GNU Linux-libre 4.9.301-gnu1
[releases.git] / drivers / staging / lustre / lnet / libcfs / workitem.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2012, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * libcfs/libcfs/workitem.c
33  *
34  * Author: Isaac Huang <isaac@clusterfs.com>
35  *       Liang Zhen  <zhen.liang@sun.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_LNET
39
40 #include "../../include/linux/libcfs/libcfs.h"
41
42 #define CFS_WS_NAME_LEN  16
43
44 struct cfs_wi_sched {
45         /* chain on global list */
46         struct list_head                ws_list;
47         /** serialised workitems */
48         spinlock_t              ws_lock;
49         /** where schedulers sleep */
50         wait_queue_head_t               ws_waitq;
51         /** concurrent workitems */
52         struct list_head                ws_runq;
53         /**
54          * rescheduled running-workitems, a workitem can be rescheduled
55          * while running in wi_action(), but we don't to execute it again
56          * unless it returns from wi_action(), so we put it on ws_rerunq
57          * while rescheduling, and move it to runq after it returns
58          * from wi_action()
59          */
60         struct list_head                ws_rerunq;
61         /** CPT-table for this scheduler */
62         struct cfs_cpt_table    *ws_cptab;
63         /** CPT id for affinity */
64         int                     ws_cpt;
65         /** number of scheduled workitems */
66         int                     ws_nscheduled;
67         /** started scheduler thread, protected by cfs_wi_data::wi_glock */
68         unsigned int            ws_nthreads:30;
69         /** shutting down, protected by cfs_wi_data::wi_glock */
70         unsigned int            ws_stopping:1;
71         /** serialize starting thread, protected by cfs_wi_data::wi_glock */
72         unsigned int            ws_starting:1;
73         /** scheduler name */
74         char                    ws_name[CFS_WS_NAME_LEN];
75 };
76
77 static struct cfs_workitem_data {
78         /** serialize */
79         spinlock_t              wi_glock;
80         /** list of all schedulers */
81         struct list_head                wi_scheds;
82         /** WI module is initialized */
83         int                     wi_init;
84         /** shutting down the whole WI module */
85         int                     wi_stopping;
86 } cfs_wi_data;
87
88 static inline int
89 cfs_wi_sched_cansleep(struct cfs_wi_sched *sched)
90 {
91         spin_lock(&sched->ws_lock);
92         if (sched->ws_stopping) {
93                 spin_unlock(&sched->ws_lock);
94                 return 0;
95         }
96
97         if (!list_empty(&sched->ws_runq)) {
98                 spin_unlock(&sched->ws_lock);
99                 return 0;
100         }
101         spin_unlock(&sched->ws_lock);
102         return 1;
103 }
104
105 /* XXX:
106  * 0. it only works when called from wi->wi_action.
107  * 1. when it returns no one shall try to schedule the workitem.
108  */
109 void
110 cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
111 {
112         LASSERT(!in_interrupt()); /* because we use plain spinlock */
113         LASSERT(!sched->ws_stopping);
114
115         spin_lock(&sched->ws_lock);
116
117         LASSERT(wi->wi_running);
118         if (wi->wi_scheduled) { /* cancel pending schedules */
119                 LASSERT(!list_empty(&wi->wi_list));
120                 list_del_init(&wi->wi_list);
121
122                 LASSERT(sched->ws_nscheduled > 0);
123                 sched->ws_nscheduled--;
124         }
125
126         LASSERT(list_empty(&wi->wi_list));
127
128         wi->wi_scheduled = 1; /* LBUG future schedule attempts */
129         spin_unlock(&sched->ws_lock);
130 }
131 EXPORT_SYMBOL(cfs_wi_exit);
132
133 /**
134  * cancel schedule request of workitem \a wi
135  */
136 int
137 cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
138 {
139         int     rc;
140
141         LASSERT(!in_interrupt()); /* because we use plain spinlock */
142         LASSERT(!sched->ws_stopping);
143
144         /*
145          * return 0 if it's running already, otherwise return 1, which
146          * means the workitem will not be scheduled and will not have
147          * any race with wi_action.
148          */
149         spin_lock(&sched->ws_lock);
150
151         rc = !(wi->wi_running);
152
153         if (wi->wi_scheduled) { /* cancel pending schedules */
154                 LASSERT(!list_empty(&wi->wi_list));
155                 list_del_init(&wi->wi_list);
156
157                 LASSERT(sched->ws_nscheduled > 0);
158                 sched->ws_nscheduled--;
159
160                 wi->wi_scheduled = 0;
161         }
162
163         LASSERT(list_empty(&wi->wi_list));
164
165         spin_unlock(&sched->ws_lock);
166         return rc;
167 }
168 EXPORT_SYMBOL(cfs_wi_deschedule);
169
170 /*
171  * Workitem scheduled with (serial == 1) is strictly serialised not only with
172  * itself, but also with others scheduled this way.
173  *
174  * Now there's only one static serialised queue, but in the future more might
175  * be added, and even dynamic creation of serialised queues might be supported.
176  */
177 void
178 cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
179 {
180         LASSERT(!in_interrupt()); /* because we use plain spinlock */
181         LASSERT(!sched->ws_stopping);
182
183         spin_lock(&sched->ws_lock);
184
185         if (!wi->wi_scheduled) {
186                 LASSERT(list_empty(&wi->wi_list));
187
188                 wi->wi_scheduled = 1;
189                 sched->ws_nscheduled++;
190                 if (!wi->wi_running) {
191                         list_add_tail(&wi->wi_list, &sched->ws_runq);
192                         wake_up(&sched->ws_waitq);
193                 } else {
194                         list_add(&wi->wi_list, &sched->ws_rerunq);
195                 }
196         }
197
198         LASSERT(!list_empty(&wi->wi_list));
199         spin_unlock(&sched->ws_lock);
200 }
201 EXPORT_SYMBOL(cfs_wi_schedule);
202
203 static int cfs_wi_scheduler(void *arg)
204 {
205         struct cfs_wi_sched     *sched = (struct cfs_wi_sched *)arg;
206
207         cfs_block_allsigs();
208
209         /* CPT affinity scheduler? */
210         if (sched->ws_cptab)
211                 if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0)
212                         CWARN("Failed to bind %s on CPT %d\n",
213                               sched->ws_name, sched->ws_cpt);
214
215         spin_lock(&cfs_wi_data.wi_glock);
216
217         LASSERT(sched->ws_starting == 1);
218         sched->ws_starting--;
219         sched->ws_nthreads++;
220
221         spin_unlock(&cfs_wi_data.wi_glock);
222
223         spin_lock(&sched->ws_lock);
224
225         while (!sched->ws_stopping) {
226                 int          nloops = 0;
227                 int          rc;
228                 struct cfs_workitem *wi;
229
230                 while (!list_empty(&sched->ws_runq) &&
231                        nloops < CFS_WI_RESCHED) {
232                         wi = list_entry(sched->ws_runq.next,
233                                         struct cfs_workitem, wi_list);
234                         LASSERT(wi->wi_scheduled && !wi->wi_running);
235
236                         list_del_init(&wi->wi_list);
237
238                         LASSERT(sched->ws_nscheduled > 0);
239                         sched->ws_nscheduled--;
240
241                         wi->wi_running   = 1;
242                         wi->wi_scheduled = 0;
243
244                         spin_unlock(&sched->ws_lock);
245                         nloops++;
246
247                         rc = (*wi->wi_action) (wi);
248
249                         spin_lock(&sched->ws_lock);
250                         if (rc != 0) /* WI should be dead, even be freed! */
251                                 continue;
252
253                         wi->wi_running = 0;
254                         if (list_empty(&wi->wi_list))
255                                 continue;
256
257                         LASSERT(wi->wi_scheduled);
258                         /* wi is rescheduled, should be on rerunq now, we
259                          * move it to runq so it can run action now
260                          */
261                         list_move_tail(&wi->wi_list, &sched->ws_runq);
262                 }
263
264                 if (!list_empty(&sched->ws_runq)) {
265                         spin_unlock(&sched->ws_lock);
266                         /* don't sleep because some workitems still
267                          * expect me to come back soon
268                          */
269                         cond_resched();
270                         spin_lock(&sched->ws_lock);
271                         continue;
272                 }
273
274                 spin_unlock(&sched->ws_lock);
275                 rc = wait_event_interruptible_exclusive(sched->ws_waitq,
276                                                 !cfs_wi_sched_cansleep(sched));
277                 spin_lock(&sched->ws_lock);
278         }
279
280         spin_unlock(&sched->ws_lock);
281
282         spin_lock(&cfs_wi_data.wi_glock);
283         sched->ws_nthreads--;
284         spin_unlock(&cfs_wi_data.wi_glock);
285
286         return 0;
287 }
288
289 void
290 cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
291 {
292         int     i;
293
294         LASSERT(cfs_wi_data.wi_init);
295         LASSERT(!cfs_wi_data.wi_stopping);
296
297         spin_lock(&cfs_wi_data.wi_glock);
298         if (sched->ws_stopping) {
299                 CDEBUG(D_INFO, "%s is in progress of stopping\n",
300                        sched->ws_name);
301                 spin_unlock(&cfs_wi_data.wi_glock);
302                 return;
303         }
304
305         LASSERT(!list_empty(&sched->ws_list));
306         sched->ws_stopping = 1;
307
308         spin_unlock(&cfs_wi_data.wi_glock);
309
310         i = 2;
311         wake_up_all(&sched->ws_waitq);
312
313         spin_lock(&cfs_wi_data.wi_glock);
314         while (sched->ws_nthreads > 0) {
315                 CDEBUG(is_power_of_2(++i) ? D_WARNING : D_NET,
316                        "waiting for %d threads of WI sched[%s] to terminate\n",
317                        sched->ws_nthreads, sched->ws_name);
318
319                 spin_unlock(&cfs_wi_data.wi_glock);
320                 set_current_state(TASK_UNINTERRUPTIBLE);
321                 schedule_timeout(cfs_time_seconds(1) / 20);
322                 spin_lock(&cfs_wi_data.wi_glock);
323         }
324
325         list_del(&sched->ws_list);
326
327         spin_unlock(&cfs_wi_data.wi_glock);
328         LASSERT(sched->ws_nscheduled == 0);
329
330         LIBCFS_FREE(sched, sizeof(*sched));
331 }
332 EXPORT_SYMBOL(cfs_wi_sched_destroy);
333
334 int
335 cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
336                     int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
337 {
338         struct cfs_wi_sched     *sched;
339         int                     rc;
340
341         LASSERT(cfs_wi_data.wi_init);
342         LASSERT(!cfs_wi_data.wi_stopping);
343         LASSERT(!cptab || cpt == CFS_CPT_ANY ||
344                 (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
345
346         LIBCFS_ALLOC(sched, sizeof(*sched));
347         if (!sched)
348                 return -ENOMEM;
349
350         if (strlen(name) > sizeof(sched->ws_name) - 1) {
351                 LIBCFS_FREE(sched, sizeof(*sched));
352                 return -E2BIG;
353         }
354         strncpy(sched->ws_name, name, sizeof(sched->ws_name));
355
356         sched->ws_cptab = cptab;
357         sched->ws_cpt = cpt;
358
359         spin_lock_init(&sched->ws_lock);
360         init_waitqueue_head(&sched->ws_waitq);
361         INIT_LIST_HEAD(&sched->ws_runq);
362         INIT_LIST_HEAD(&sched->ws_rerunq);
363         INIT_LIST_HEAD(&sched->ws_list);
364
365         rc = 0;
366         while (nthrs > 0)  {
367                 char    name[16];
368                 struct task_struct *task;
369
370                 spin_lock(&cfs_wi_data.wi_glock);
371                 while (sched->ws_starting > 0) {
372                         spin_unlock(&cfs_wi_data.wi_glock);
373                         schedule();
374                         spin_lock(&cfs_wi_data.wi_glock);
375                 }
376
377                 sched->ws_starting++;
378                 spin_unlock(&cfs_wi_data.wi_glock);
379
380                 if (sched->ws_cptab && sched->ws_cpt >= 0) {
381                         snprintf(name, sizeof(name), "%s_%02d_%02u",
382                                  sched->ws_name, sched->ws_cpt,
383                                  sched->ws_nthreads);
384                 } else {
385                         snprintf(name, sizeof(name), "%s_%02u",
386                                  sched->ws_name, sched->ws_nthreads);
387                 }
388
389                 task = kthread_run(cfs_wi_scheduler, sched, "%s", name);
390                 if (!IS_ERR(task)) {
391                         nthrs--;
392                         continue;
393                 }
394                 rc = PTR_ERR(task);
395
396                 CERROR("Failed to create thread for WI scheduler %s: %d\n",
397                        name, rc);
398
399                 spin_lock(&cfs_wi_data.wi_glock);
400
401                 /* make up for cfs_wi_sched_destroy */
402                 list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
403                 sched->ws_starting--;
404
405                 spin_unlock(&cfs_wi_data.wi_glock);
406
407                 cfs_wi_sched_destroy(sched);
408                 return rc;
409         }
410         spin_lock(&cfs_wi_data.wi_glock);
411         list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
412         spin_unlock(&cfs_wi_data.wi_glock);
413
414         *sched_pp = sched;
415         return 0;
416 }
417 EXPORT_SYMBOL(cfs_wi_sched_create);
418
419 int
420 cfs_wi_startup(void)
421 {
422         memset(&cfs_wi_data, 0, sizeof(cfs_wi_data));
423
424         spin_lock_init(&cfs_wi_data.wi_glock);
425         INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
426         cfs_wi_data.wi_init = 1;
427
428         return 0;
429 }
430
431 void
432 cfs_wi_shutdown(void)
433 {
434         struct cfs_wi_sched     *sched;
435         struct cfs_wi_sched *temp;
436
437         spin_lock(&cfs_wi_data.wi_glock);
438         cfs_wi_data.wi_stopping = 1;
439         spin_unlock(&cfs_wi_data.wi_glock);
440
441         /* nobody should contend on this list */
442         list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
443                 sched->ws_stopping = 1;
444                 wake_up_all(&sched->ws_waitq);
445         }
446
447         list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
448                 spin_lock(&cfs_wi_data.wi_glock);
449
450                 while (sched->ws_nthreads != 0) {
451                         spin_unlock(&cfs_wi_data.wi_glock);
452                         set_current_state(TASK_UNINTERRUPTIBLE);
453                         schedule_timeout(cfs_time_seconds(1) / 20);
454                         spin_lock(&cfs_wi_data.wi_glock);
455                 }
456                 spin_unlock(&cfs_wi_data.wi_glock);
457         }
458         list_for_each_entry_safe(sched, temp, &cfs_wi_data.wi_scheds, ws_list) {
459                 list_del(&sched->ws_list);
460                 LIBCFS_FREE(sched, sizeof(*sched));
461         }
462
463         cfs_wi_data.wi_stopping = 0;
464         cfs_wi_data.wi_init = 0;
465 }