4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2012, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * libcfs/libcfs/workitem.c
34 * Author: Isaac Huang <isaac@clusterfs.com>
35 * Liang Zhen <zhen.liang@sun.com>
38 #define DEBUG_SUBSYSTEM S_LNET
40 #include "../../include/linux/libcfs/libcfs.h"
42 #define CFS_WS_NAME_LEN 16
45 /* chain on global list */
46 struct list_head ws_list;
47 /** serialised workitems */
49 /** where schedulers sleep */
50 wait_queue_head_t ws_waitq;
51 /** concurrent workitems */
52 struct list_head ws_runq;
54 * rescheduled running-workitems, a workitem can be rescheduled
55 * while running in wi_action(), but we don't to execute it again
56 * unless it returns from wi_action(), so we put it on ws_rerunq
57 * while rescheduling, and move it to runq after it returns
60 struct list_head ws_rerunq;
61 /** CPT-table for this scheduler */
62 struct cfs_cpt_table *ws_cptab;
63 /** CPT id for affinity */
65 /** number of scheduled workitems */
67 /** started scheduler thread, protected by cfs_wi_data::wi_glock */
68 unsigned int ws_nthreads:30;
69 /** shutting down, protected by cfs_wi_data::wi_glock */
70 unsigned int ws_stopping:1;
71 /** serialize starting thread, protected by cfs_wi_data::wi_glock */
72 unsigned int ws_starting:1;
74 char ws_name[CFS_WS_NAME_LEN];
77 static struct cfs_workitem_data {
80 /** list of all schedulers */
81 struct list_head wi_scheds;
82 /** WI module is initialized */
84 /** shutting down the whole WI module */
89 cfs_wi_sched_cansleep(struct cfs_wi_sched *sched)
91 spin_lock(&sched->ws_lock);
92 if (sched->ws_stopping) {
93 spin_unlock(&sched->ws_lock);
97 if (!list_empty(&sched->ws_runq)) {
98 spin_unlock(&sched->ws_lock);
101 spin_unlock(&sched->ws_lock);
106 * 0. it only works when called from wi->wi_action.
107 * 1. when it returns no one shall try to schedule the workitem.
110 cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
112 LASSERT(!in_interrupt()); /* because we use plain spinlock */
113 LASSERT(!sched->ws_stopping);
115 spin_lock(&sched->ws_lock);
117 LASSERT(wi->wi_running);
118 if (wi->wi_scheduled) { /* cancel pending schedules */
119 LASSERT(!list_empty(&wi->wi_list));
120 list_del_init(&wi->wi_list);
122 LASSERT(sched->ws_nscheduled > 0);
123 sched->ws_nscheduled--;
126 LASSERT(list_empty(&wi->wi_list));
128 wi->wi_scheduled = 1; /* LBUG future schedule attempts */
129 spin_unlock(&sched->ws_lock);
131 EXPORT_SYMBOL(cfs_wi_exit);
134 * cancel schedule request of workitem \a wi
137 cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
141 LASSERT(!in_interrupt()); /* because we use plain spinlock */
142 LASSERT(!sched->ws_stopping);
145 * return 0 if it's running already, otherwise return 1, which
146 * means the workitem will not be scheduled and will not have
147 * any race with wi_action.
149 spin_lock(&sched->ws_lock);
151 rc = !(wi->wi_running);
153 if (wi->wi_scheduled) { /* cancel pending schedules */
154 LASSERT(!list_empty(&wi->wi_list));
155 list_del_init(&wi->wi_list);
157 LASSERT(sched->ws_nscheduled > 0);
158 sched->ws_nscheduled--;
160 wi->wi_scheduled = 0;
163 LASSERT(list_empty(&wi->wi_list));
165 spin_unlock(&sched->ws_lock);
168 EXPORT_SYMBOL(cfs_wi_deschedule);
171 * Workitem scheduled with (serial == 1) is strictly serialised not only with
172 * itself, but also with others scheduled this way.
174 * Now there's only one static serialised queue, but in the future more might
175 * be added, and even dynamic creation of serialised queues might be supported.
178 cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
180 LASSERT(!in_interrupt()); /* because we use plain spinlock */
181 LASSERT(!sched->ws_stopping);
183 spin_lock(&sched->ws_lock);
185 if (!wi->wi_scheduled) {
186 LASSERT(list_empty(&wi->wi_list));
188 wi->wi_scheduled = 1;
189 sched->ws_nscheduled++;
190 if (!wi->wi_running) {
191 list_add_tail(&wi->wi_list, &sched->ws_runq);
192 wake_up(&sched->ws_waitq);
194 list_add(&wi->wi_list, &sched->ws_rerunq);
198 LASSERT(!list_empty(&wi->wi_list));
199 spin_unlock(&sched->ws_lock);
201 EXPORT_SYMBOL(cfs_wi_schedule);
203 static int cfs_wi_scheduler(void *arg)
205 struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg;
209 /* CPT affinity scheduler? */
211 if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0)
212 CWARN("Failed to bind %s on CPT %d\n",
213 sched->ws_name, sched->ws_cpt);
215 spin_lock(&cfs_wi_data.wi_glock);
217 LASSERT(sched->ws_starting == 1);
218 sched->ws_starting--;
219 sched->ws_nthreads++;
221 spin_unlock(&cfs_wi_data.wi_glock);
223 spin_lock(&sched->ws_lock);
225 while (!sched->ws_stopping) {
228 struct cfs_workitem *wi;
230 while (!list_empty(&sched->ws_runq) &&
231 nloops < CFS_WI_RESCHED) {
232 wi = list_entry(sched->ws_runq.next,
233 struct cfs_workitem, wi_list);
234 LASSERT(wi->wi_scheduled && !wi->wi_running);
236 list_del_init(&wi->wi_list);
238 LASSERT(sched->ws_nscheduled > 0);
239 sched->ws_nscheduled--;
242 wi->wi_scheduled = 0;
244 spin_unlock(&sched->ws_lock);
247 rc = (*wi->wi_action) (wi);
249 spin_lock(&sched->ws_lock);
250 if (rc != 0) /* WI should be dead, even be freed! */
254 if (list_empty(&wi->wi_list))
257 LASSERT(wi->wi_scheduled);
258 /* wi is rescheduled, should be on rerunq now, we
259 * move it to runq so it can run action now
261 list_move_tail(&wi->wi_list, &sched->ws_runq);
264 if (!list_empty(&sched->ws_runq)) {
265 spin_unlock(&sched->ws_lock);
266 /* don't sleep because some workitems still
267 * expect me to come back soon
270 spin_lock(&sched->ws_lock);
274 spin_unlock(&sched->ws_lock);
275 rc = wait_event_interruptible_exclusive(sched->ws_waitq,
276 !cfs_wi_sched_cansleep(sched));
277 spin_lock(&sched->ws_lock);
280 spin_unlock(&sched->ws_lock);
282 spin_lock(&cfs_wi_data.wi_glock);
283 sched->ws_nthreads--;
284 spin_unlock(&cfs_wi_data.wi_glock);
290 cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
294 LASSERT(cfs_wi_data.wi_init);
295 LASSERT(!cfs_wi_data.wi_stopping);
297 spin_lock(&cfs_wi_data.wi_glock);
298 if (sched->ws_stopping) {
299 CDEBUG(D_INFO, "%s is in progress of stopping\n",
301 spin_unlock(&cfs_wi_data.wi_glock);
305 LASSERT(!list_empty(&sched->ws_list));
306 sched->ws_stopping = 1;
308 spin_unlock(&cfs_wi_data.wi_glock);
311 wake_up_all(&sched->ws_waitq);
313 spin_lock(&cfs_wi_data.wi_glock);
314 while (sched->ws_nthreads > 0) {
315 CDEBUG(is_power_of_2(++i) ? D_WARNING : D_NET,
316 "waiting for %d threads of WI sched[%s] to terminate\n",
317 sched->ws_nthreads, sched->ws_name);
319 spin_unlock(&cfs_wi_data.wi_glock);
320 set_current_state(TASK_UNINTERRUPTIBLE);
321 schedule_timeout(cfs_time_seconds(1) / 20);
322 spin_lock(&cfs_wi_data.wi_glock);
325 list_del(&sched->ws_list);
327 spin_unlock(&cfs_wi_data.wi_glock);
328 LASSERT(sched->ws_nscheduled == 0);
330 LIBCFS_FREE(sched, sizeof(*sched));
332 EXPORT_SYMBOL(cfs_wi_sched_destroy);
335 cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
336 int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
338 struct cfs_wi_sched *sched;
341 LASSERT(cfs_wi_data.wi_init);
342 LASSERT(!cfs_wi_data.wi_stopping);
343 LASSERT(!cptab || cpt == CFS_CPT_ANY ||
344 (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
346 LIBCFS_ALLOC(sched, sizeof(*sched));
350 if (strlen(name) > sizeof(sched->ws_name) - 1) {
351 LIBCFS_FREE(sched, sizeof(*sched));
354 strncpy(sched->ws_name, name, sizeof(sched->ws_name));
356 sched->ws_cptab = cptab;
359 spin_lock_init(&sched->ws_lock);
360 init_waitqueue_head(&sched->ws_waitq);
361 INIT_LIST_HEAD(&sched->ws_runq);
362 INIT_LIST_HEAD(&sched->ws_rerunq);
363 INIT_LIST_HEAD(&sched->ws_list);
368 struct task_struct *task;
370 spin_lock(&cfs_wi_data.wi_glock);
371 while (sched->ws_starting > 0) {
372 spin_unlock(&cfs_wi_data.wi_glock);
374 spin_lock(&cfs_wi_data.wi_glock);
377 sched->ws_starting++;
378 spin_unlock(&cfs_wi_data.wi_glock);
380 if (sched->ws_cptab && sched->ws_cpt >= 0) {
381 snprintf(name, sizeof(name), "%s_%02d_%02u",
382 sched->ws_name, sched->ws_cpt,
385 snprintf(name, sizeof(name), "%s_%02u",
386 sched->ws_name, sched->ws_nthreads);
389 task = kthread_run(cfs_wi_scheduler, sched, "%s", name);
396 CERROR("Failed to create thread for WI scheduler %s: %d\n",
399 spin_lock(&cfs_wi_data.wi_glock);
401 /* make up for cfs_wi_sched_destroy */
402 list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
403 sched->ws_starting--;
405 spin_unlock(&cfs_wi_data.wi_glock);
407 cfs_wi_sched_destroy(sched);
410 spin_lock(&cfs_wi_data.wi_glock);
411 list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
412 spin_unlock(&cfs_wi_data.wi_glock);
417 EXPORT_SYMBOL(cfs_wi_sched_create);
422 memset(&cfs_wi_data, 0, sizeof(cfs_wi_data));
424 spin_lock_init(&cfs_wi_data.wi_glock);
425 INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
426 cfs_wi_data.wi_init = 1;
432 cfs_wi_shutdown(void)
434 struct cfs_wi_sched *sched;
435 struct cfs_wi_sched *temp;
437 spin_lock(&cfs_wi_data.wi_glock);
438 cfs_wi_data.wi_stopping = 1;
439 spin_unlock(&cfs_wi_data.wi_glock);
441 /* nobody should contend on this list */
442 list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
443 sched->ws_stopping = 1;
444 wake_up_all(&sched->ws_waitq);
447 list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
448 spin_lock(&cfs_wi_data.wi_glock);
450 while (sched->ws_nthreads != 0) {
451 spin_unlock(&cfs_wi_data.wi_glock);
452 set_current_state(TASK_UNINTERRUPTIBLE);
453 schedule_timeout(cfs_time_seconds(1) / 20);
454 spin_lock(&cfs_wi_data.wi_glock);
456 spin_unlock(&cfs_wi_data.wi_glock);
458 list_for_each_entry_safe(sched, temp, &cfs_wi_data.wi_scheds, ws_list) {
459 list_del(&sched->ws_list);
460 LIBCFS_FREE(sched, sizeof(*sched));
463 cfs_wi_data.wi_stopping = 0;
464 cfs_wi_data.wi_init = 0;