4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
19 * Copyright (c) 2011 Intel Corporation
21 * Copyright 2012 Xyratex Technology Limited
26 * Network Request Scheduler (NRS)
28 * Allows to reorder the handling of RPCs at servers.
30 * Author: Liang Zhen <liang@whamcloud.com>
31 * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
38 #define DEBUG_SUBSYSTEM S_RPC
39 #include <obd_support.h>
40 #include <obd_class.h>
41 #include <lustre_net.h>
42 #include <lprocfs_status.h>
43 #include <linux/libcfs/libcfs.h>
44 #include "ptlrpc_internal.h"
49 struct nrs_core nrs_core;
51 static int nrs_policy_init(struct ptlrpc_nrs_policy *policy)
53 return policy->pol_desc->pd_ops->op_policy_init ?
54 policy->pol_desc->pd_ops->op_policy_init(policy) : 0;
57 static void nrs_policy_fini(struct ptlrpc_nrs_policy *policy)
59 LASSERT(policy->pol_ref == 0);
60 LASSERT(policy->pol_req_queued == 0);
62 if (policy->pol_desc->pd_ops->op_policy_fini)
63 policy->pol_desc->pd_ops->op_policy_fini(policy);
66 static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy,
67 enum ptlrpc_nrs_ctl opc, void *arg)
70 * The policy may be stopped, but the lprocfs files and
71 * ptlrpc_nrs_policy instances remain present until unregistration time.
72 * Do not perform the ctl operation if the policy is stopped, as
73 * policy->pol_private will be NULL in such a case.
75 if (policy->pol_state == NRS_POL_STATE_STOPPED)
78 return policy->pol_desc->pd_ops->op_policy_ctl ?
79 policy->pol_desc->pd_ops->op_policy_ctl(policy, opc, arg) :
83 static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy)
85 if (policy->pol_desc->pd_ops->op_policy_stop)
86 policy->pol_desc->pd_ops->op_policy_stop(policy);
88 LASSERT(list_empty(&policy->pol_list_queued));
89 LASSERT(policy->pol_req_queued == 0 &&
90 policy->pol_req_started == 0);
92 policy->pol_private = NULL;
94 policy->pol_state = NRS_POL_STATE_STOPPED;
96 if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
97 module_put(policy->pol_desc->pd_owner);
100 static int nrs_policy_stop_locked(struct ptlrpc_nrs_policy *policy)
102 struct ptlrpc_nrs *nrs = policy->pol_nrs;
104 if (nrs->nrs_policy_fallback == policy && !nrs->nrs_stopping)
107 if (policy->pol_state == NRS_POL_STATE_STARTING)
110 /* In progress or already stopped */
111 if (policy->pol_state != NRS_POL_STATE_STARTED)
114 policy->pol_state = NRS_POL_STATE_STOPPING;
116 /* Immediately make it invisible */
117 if (nrs->nrs_policy_primary == policy) {
118 nrs->nrs_policy_primary = NULL;
121 LASSERT(nrs->nrs_policy_fallback == policy);
122 nrs->nrs_policy_fallback = NULL;
125 /* I have the only refcount */
126 if (policy->pol_ref == 1)
127 nrs_policy_stop0(policy);
133 * Transitions the \a nrs NRS head's primary policy to
134 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING and if the policy has no
135 * pending usage references, to ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED.
137 * \param[in] nrs the NRS head to carry out this operation on
139 static void nrs_policy_stop_primary(struct ptlrpc_nrs *nrs)
141 struct ptlrpc_nrs_policy *tmp = nrs->nrs_policy_primary;
146 nrs->nrs_policy_primary = NULL;
148 LASSERT(tmp->pol_state == NRS_POL_STATE_STARTED);
149 tmp->pol_state = NRS_POL_STATE_STOPPING;
151 if (tmp->pol_ref == 0)
152 nrs_policy_stop0(tmp);
156 * Transitions a policy across the ptlrpc_nrs_pol_state range of values, in
157 * response to an lprocfs command to start a policy.
159 * If a primary policy different to the current one is specified, this function
160 * will transition the new policy to the
161 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTING and then to
162 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED, and will then transition
163 * the old primary policy (if there is one) to
164 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
165 * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED.
167 * If the fallback policy is specified, this is taken to indicate an instruction
168 * to stop the current primary policy, without substituting it with another
169 * primary policy, so the primary policy (if any) is transitioned to
170 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
171 * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. In
172 * this case, the fallback policy is only left active in the NRS head.
174 static int nrs_policy_start_locked(struct ptlrpc_nrs_policy *policy)
176 struct ptlrpc_nrs *nrs = policy->pol_nrs;
180 * Don't allow multiple starting which is too complex, and has no real
183 if (nrs->nrs_policy_starting)
186 LASSERT(policy->pol_state != NRS_POL_STATE_STARTING);
188 if (policy->pol_state == NRS_POL_STATE_STOPPING)
191 if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
193 * This is for cases in which the user sets the policy to the
194 * fallback policy (currently fifo for all services); i.e. the
195 * user is resetting the policy to the default; so we stop the
196 * primary policy, if any.
198 if (policy == nrs->nrs_policy_fallback) {
199 nrs_policy_stop_primary(nrs);
204 * If we reach here, we must be setting up the fallback policy
205 * at service startup time, and only a single policy with the
206 * nrs_policy_flags::PTLRPC_NRS_FL_FALLBACK flag set can
207 * register with NRS core.
209 LASSERT(!nrs->nrs_policy_fallback);
212 * Shouldn't start primary policy if w/o fallback policy.
214 if (!nrs->nrs_policy_fallback)
217 if (policy->pol_state == NRS_POL_STATE_STARTED)
222 * Increase the module usage count for policies registering from other
225 if (atomic_inc_return(&policy->pol_desc->pd_refs) == 1 &&
226 !try_module_get(policy->pol_desc->pd_owner)) {
227 atomic_dec(&policy->pol_desc->pd_refs);
228 CERROR("NRS: cannot get module for policy %s; is it alive?\n",
229 policy->pol_desc->pd_name);
234 * Serialize policy starting across the NRS head
236 nrs->nrs_policy_starting = 1;
238 policy->pol_state = NRS_POL_STATE_STARTING;
240 if (policy->pol_desc->pd_ops->op_policy_start) {
241 spin_unlock(&nrs->nrs_lock);
243 rc = policy->pol_desc->pd_ops->op_policy_start(policy);
245 spin_lock(&nrs->nrs_lock);
247 if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
248 module_put(policy->pol_desc->pd_owner);
250 policy->pol_state = NRS_POL_STATE_STOPPED;
255 policy->pol_state = NRS_POL_STATE_STARTED;
257 if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
259 * This path is only used at PTLRPC service setup time.
261 nrs->nrs_policy_fallback = policy;
264 * Try to stop the current primary policy if there is one.
266 nrs_policy_stop_primary(nrs);
269 * And set the newly-started policy as the primary one.
271 nrs->nrs_policy_primary = policy;
275 nrs->nrs_policy_starting = 0;
281 * Increases the policy's usage reference count.
283 static inline void nrs_policy_get_locked(struct ptlrpc_nrs_policy *policy)
289 * Decreases the policy's usage reference count, and stops the policy in case it
290 * was already stopping and have no more outstanding usage references (which
291 * indicates it has no more queued or started requests, and can be safely
294 static void nrs_policy_put_locked(struct ptlrpc_nrs_policy *policy)
296 LASSERT(policy->pol_ref > 0);
299 if (unlikely(policy->pol_ref == 0 &&
300 policy->pol_state == NRS_POL_STATE_STOPPING))
301 nrs_policy_stop0(policy);
304 static void nrs_policy_put(struct ptlrpc_nrs_policy *policy)
306 spin_lock(&policy->pol_nrs->nrs_lock);
307 nrs_policy_put_locked(policy);
308 spin_unlock(&policy->pol_nrs->nrs_lock);
312 * Find and return a policy by name.
314 static struct ptlrpc_nrs_policy *nrs_policy_find_locked(struct ptlrpc_nrs *nrs,
317 struct ptlrpc_nrs_policy *tmp;
319 list_for_each_entry(tmp, &nrs->nrs_policy_list, pol_list) {
320 if (strncmp(tmp->pol_desc->pd_name, name,
321 NRS_POL_NAME_MAX) == 0) {
322 nrs_policy_get_locked(tmp);
330 * Release references for the resource hierarchy moving upwards towards the
331 * policy instance resource.
333 static void nrs_resource_put(struct ptlrpc_nrs_resource *res)
335 struct ptlrpc_nrs_policy *policy = res->res_policy;
337 if (policy->pol_desc->pd_ops->op_res_put) {
338 struct ptlrpc_nrs_resource *parent;
340 for (; res; res = parent) {
341 parent = res->res_parent;
342 policy->pol_desc->pd_ops->op_res_put(policy, res);
348 * Obtains references for each resource in the resource hierarchy for request
349 * \a nrq if it is to be handled by \a policy.
351 * \param[in] policy the policy
352 * \param[in] nrq the request
353 * \param[in] moving_req denotes whether this is a call to the function by
354 * ldlm_lock_reorder_req(), in order to move \a nrq to
355 * the high-priority NRS head; we should not sleep when
358 * \retval NULL resource hierarchy references not obtained
359 * \retval valid-pointer the bottom level of the resource hierarchy
361 * \see ptlrpc_nrs_pol_ops::op_res_get()
364 struct ptlrpc_nrs_resource *nrs_resource_get(struct ptlrpc_nrs_policy *policy,
365 struct ptlrpc_nrs_request *nrq,
369 * Set to NULL to traverse the resource hierarchy from the top.
371 struct ptlrpc_nrs_resource *res = NULL;
372 struct ptlrpc_nrs_resource *tmp = NULL;
376 rc = policy->pol_desc->pd_ops->op_res_get(policy, nrq, res,
380 nrs_resource_put(res);
384 tmp->res_parent = res;
385 tmp->res_policy = policy;
389 * Return once we have obtained a reference to the bottom level
390 * of the resource hierarchy.
398 * Obtains resources for the resource hierarchies and policy references for
399 * the fallback and current primary policy (if any), that will later be used
400 * to handle request \a nrq.
402 * \param[in] nrs the NRS head instance that will be handling request \a nrq.
403 * \param[in] nrq the request that is being handled.
404 * \param[out] resp the array where references to the resource hierarchy are
406 * \param[in] moving_req is set when obtaining resources while moving a
407 * request from a policy on the regular NRS head to a
408 * policy on the HP NRS head (via
409 * ldlm_lock_reorder_req()). It signifies that
410 * allocations to get resources should be atomic; for
411 * a full explanation, see comment in
412 * ptlrpc_nrs_pol_ops::op_res_get().
414 static void nrs_resource_get_safe(struct ptlrpc_nrs *nrs,
415 struct ptlrpc_nrs_request *nrq,
416 struct ptlrpc_nrs_resource **resp,
419 struct ptlrpc_nrs_policy *primary = NULL;
420 struct ptlrpc_nrs_policy *fallback = NULL;
422 memset(resp, 0, sizeof(resp[0]) * NRS_RES_MAX);
425 * Obtain policy references.
427 spin_lock(&nrs->nrs_lock);
429 fallback = nrs->nrs_policy_fallback;
430 nrs_policy_get_locked(fallback);
432 primary = nrs->nrs_policy_primary;
434 nrs_policy_get_locked(primary);
436 spin_unlock(&nrs->nrs_lock);
439 * Obtain resource hierarchy references.
441 resp[NRS_RES_FALLBACK] = nrs_resource_get(fallback, nrq, moving_req);
442 LASSERT(resp[NRS_RES_FALLBACK]);
445 resp[NRS_RES_PRIMARY] = nrs_resource_get(primary, nrq,
448 * A primary policy may exist which may not wish to serve a
449 * particular request for different reasons; release the
450 * reference on the policy as it will not be used for this
453 if (!resp[NRS_RES_PRIMARY])
454 nrs_policy_put(primary);
459 * Releases references to resource hierarchies and policies, because they are no
460 * longer required; used when request handling has been completed, or the
461 * request is moving to the high priority NRS head.
463 * \param resp the resource hierarchy that is being released
465 * \see ptlrpc_nrs_req_finalize()
467 static void nrs_resource_put_safe(struct ptlrpc_nrs_resource **resp)
469 struct ptlrpc_nrs_policy *pols[NRS_RES_MAX];
472 for (i = 0; i < NRS_RES_MAX; i++) {
474 pols[i] = resp[i]->res_policy;
475 nrs_resource_put(resp[i]);
482 for (i = 0; i < NRS_RES_MAX; i++) {
484 nrs_policy_put(pols[i]);
489 * Obtains an NRS request from \a policy for handling or examination; the
490 * request should be removed in the 'handling' case.
492 * Calling into this function implies we already know the policy has a request
493 * waiting to be handled.
495 * \param[in] policy the policy from which a request
496 * \param[in] peek when set, signifies that we just want to examine the
497 * request, and not handle it, so the request is not removed
499 * \param[in] force when set, it will force a policy to return a request if it
502 * \retval the NRS request to be handled
505 struct ptlrpc_nrs_request *nrs_request_get(struct ptlrpc_nrs_policy *policy,
506 bool peek, bool force)
508 struct ptlrpc_nrs_request *nrq;
510 LASSERT(policy->pol_req_queued > 0);
512 nrq = policy->pol_desc->pd_ops->op_req_get(policy, peek, force);
514 LASSERT(ergo(nrq, nrs_request_policy(nrq) == policy));
520 * Enqueues request \a nrq for later handling, via one one the policies for
521 * which resources where earlier obtained via nrs_resource_get_safe(). The
522 * function attempts to enqueue the request first on the primary policy
523 * (if any), since this is the preferred choice.
525 * \param nrq the request being enqueued
527 * \see nrs_resource_get_safe()
529 static inline void nrs_request_enqueue(struct ptlrpc_nrs_request *nrq)
531 struct ptlrpc_nrs_policy *policy;
536 * Try in descending order, because the primary policy (if any) is
537 * the preferred choice.
539 for (i = NRS_RES_MAX - 1; i >= 0; i--) {
540 if (!nrq->nr_res_ptrs[i])
544 policy = nrq->nr_res_ptrs[i]->res_policy;
546 rc = policy->pol_desc->pd_ops->op_req_enqueue(policy, nrq);
548 policy->pol_nrs->nrs_req_queued++;
549 policy->pol_req_queued++;
554 * Should never get here, as at least the primary policy's
555 * ptlrpc_nrs_pol_ops::op_req_enqueue() implementation should always
562 * Called when a request has been handled
564 * \param[in] nrs the request that has been handled; can be used for
565 * job/resource control.
567 * \see ptlrpc_nrs_req_stop_nolock()
569 static inline void nrs_request_stop(struct ptlrpc_nrs_request *nrq)
571 struct ptlrpc_nrs_policy *policy = nrs_request_policy(nrq);
573 if (policy->pol_desc->pd_ops->op_req_stop)
574 policy->pol_desc->pd_ops->op_req_stop(policy, nrq);
576 LASSERT(policy->pol_nrs->nrs_req_started > 0);
577 LASSERT(policy->pol_req_started > 0);
579 policy->pol_nrs->nrs_req_started--;
580 policy->pol_req_started--;
584 * Handler for operations that can be carried out on policies.
586 * Handles opcodes that are common to all policy types within NRS core, and
587 * passes any unknown opcodes to the policy-specific control function.
589 * \param[in] nrs the NRS head this policy belongs to.
590 * \param[in] name the human-readable policy name; should be the same as
591 * ptlrpc_nrs_pol_desc::pd_name.
592 * \param[in] opc the opcode of the operation being carried out.
593 * \param[in,out] arg can be used to pass information in and out between when
594 * carrying an operation; usually data that is private to
595 * the policy at some level, or generic policy status
598 * \retval -ve error condition
599 * \retval 0 operation was carried out successfully
601 static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name,
602 enum ptlrpc_nrs_ctl opc, void *arg)
604 struct ptlrpc_nrs_policy *policy;
607 spin_lock(&nrs->nrs_lock);
609 policy = nrs_policy_find_locked(nrs, name);
615 if (policy->pol_state != NRS_POL_STATE_STARTED &&
616 policy->pol_state != NRS_POL_STATE_STOPPED) {
623 * Unknown opcode, pass it down to the policy-specific control
624 * function for handling.
627 rc = nrs_policy_ctl_locked(policy, opc, arg);
633 case PTLRPC_NRS_CTL_START:
634 rc = nrs_policy_start_locked(policy);
639 nrs_policy_put_locked(policy);
641 spin_unlock(&nrs->nrs_lock);
647 * Unregisters a policy by name.
649 * \param[in] nrs the NRS head this policy belongs to.
650 * \param[in] name the human-readable policy name; should be the same as
651 * ptlrpc_nrs_pol_desc::pd_name
656 static int nrs_policy_unregister(struct ptlrpc_nrs *nrs, char *name)
658 struct ptlrpc_nrs_policy *policy = NULL;
660 spin_lock(&nrs->nrs_lock);
662 policy = nrs_policy_find_locked(nrs, name);
664 spin_unlock(&nrs->nrs_lock);
666 CERROR("Can't find NRS policy %s\n", name);
670 if (policy->pol_ref > 1) {
671 CERROR("Policy %s is busy with %d references\n", name,
672 (int)policy->pol_ref);
673 nrs_policy_put_locked(policy);
675 spin_unlock(&nrs->nrs_lock);
679 LASSERT(policy->pol_req_queued == 0);
680 LASSERT(policy->pol_req_started == 0);
682 if (policy->pol_state != NRS_POL_STATE_STOPPED) {
683 nrs_policy_stop_locked(policy);
684 LASSERT(policy->pol_state == NRS_POL_STATE_STOPPED);
687 list_del(&policy->pol_list);
690 nrs_policy_put_locked(policy);
692 spin_unlock(&nrs->nrs_lock);
694 nrs_policy_fini(policy);
696 LASSERT(!policy->pol_private);
703 * Register a policy from \policy descriptor \a desc with NRS head \a nrs.
705 * \param[in] nrs the NRS head on which the policy will be registered.
706 * \param[in] desc the policy descriptor from which the information will be
707 * obtained to register the policy.
712 static int nrs_policy_register(struct ptlrpc_nrs *nrs,
713 struct ptlrpc_nrs_pol_desc *desc)
715 struct ptlrpc_nrs_policy *policy;
716 struct ptlrpc_nrs_policy *tmp;
717 struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
720 LASSERT(desc->pd_ops->op_res_get);
721 LASSERT(desc->pd_ops->op_req_get);
722 LASSERT(desc->pd_ops->op_req_enqueue);
723 LASSERT(desc->pd_ops->op_req_dequeue);
724 LASSERT(desc->pd_compat);
726 policy = kzalloc_node(sizeof(*policy), GFP_NOFS,
727 cfs_cpt_spread_node(svcpt->scp_service->srv_cptable,
732 policy->pol_nrs = nrs;
733 policy->pol_desc = desc;
734 policy->pol_state = NRS_POL_STATE_STOPPED;
735 policy->pol_flags = desc->pd_flags;
737 INIT_LIST_HEAD(&policy->pol_list);
738 INIT_LIST_HEAD(&policy->pol_list_queued);
740 rc = nrs_policy_init(policy);
746 spin_lock(&nrs->nrs_lock);
748 tmp = nrs_policy_find_locked(nrs, policy->pol_desc->pd_name);
750 CERROR("NRS policy %s has been registered, can't register it for %s\n",
751 policy->pol_desc->pd_name,
752 svcpt->scp_service->srv_name);
753 nrs_policy_put_locked(tmp);
755 spin_unlock(&nrs->nrs_lock);
756 nrs_policy_fini(policy);
762 list_add_tail(&policy->pol_list, &nrs->nrs_policy_list);
765 if (policy->pol_flags & PTLRPC_NRS_FL_REG_START)
766 rc = nrs_policy_start_locked(policy);
768 spin_unlock(&nrs->nrs_lock);
771 (void)nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
777 * Enqueue request \a req using one of the policies its resources are referring
780 * \param[in] req the request to enqueue.
782 static void ptlrpc_nrs_req_add_nolock(struct ptlrpc_request *req)
784 struct ptlrpc_nrs_policy *policy;
786 LASSERT(req->rq_nrq.nr_initialized);
787 LASSERT(!req->rq_nrq.nr_enqueued);
789 nrs_request_enqueue(&req->rq_nrq);
790 req->rq_nrq.nr_enqueued = 1;
792 policy = nrs_request_policy(&req->rq_nrq);
794 * Add the policy to the NRS head's list of policies with enqueued
795 * requests, if it has not been added there.
797 if (unlikely(list_empty(&policy->pol_list_queued)))
798 list_add_tail(&policy->pol_list_queued,
799 &policy->pol_nrs->nrs_policy_queued);
803 * Enqueue a request on the high priority NRS head.
805 * \param req the request to enqueue.
807 static void ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request *req)
809 int opc = lustre_msg_get_opc(req->rq_reqmsg);
811 spin_lock(&req->rq_lock);
813 ptlrpc_nrs_req_add_nolock(req);
815 DEBUG_REQ(D_NET, req, "high priority req");
816 spin_unlock(&req->rq_lock);
820 * Returns a boolean predicate indicating whether the policy described by
821 * \a desc is adequate for use with service \a svc.
823 * \param[in] svc the service
824 * \param[in] desc the policy descriptor
826 * \retval false the policy is not compatible with the service
827 * \retval true the policy is compatible with the service
829 static inline bool nrs_policy_compatible(const struct ptlrpc_service *svc,
830 const struct ptlrpc_nrs_pol_desc *desc)
832 return desc->pd_compat(svc, desc);
836 * Registers all compatible policies in nrs_core.nrs_policies, for NRS head
839 * \param[in] nrs the NRS head
844 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
846 * \see ptlrpc_service_nrs_setup()
848 static int nrs_register_policies_locked(struct ptlrpc_nrs *nrs)
850 struct ptlrpc_nrs_pol_desc *desc;
851 /* for convenience */
852 struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
853 struct ptlrpc_service *svc = svcpt->scp_service;
856 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
858 list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
859 if (nrs_policy_compatible(svc, desc)) {
860 rc = nrs_policy_register(nrs, desc);
862 CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
863 desc->pd_name, svcpt->scp_cpt,
866 * Fail registration if any of the policies'
867 * registration fails.
878 * Initializes NRS head \a nrs of service partition \a svcpt, and registers all
879 * compatible policies in NRS core, with the NRS head.
881 * \param[in] nrs the NRS head
882 * \param[in] svcpt the PTLRPC service partition to setup
887 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
889 static int nrs_svcpt_setup_locked0(struct ptlrpc_nrs *nrs,
890 struct ptlrpc_service_part *svcpt)
892 enum ptlrpc_nrs_queue_type queue;
894 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
896 if (nrs == &svcpt->scp_nrs_reg)
897 queue = PTLRPC_NRS_QUEUE_REG;
898 else if (nrs == svcpt->scp_nrs_hp)
899 queue = PTLRPC_NRS_QUEUE_HP;
903 nrs->nrs_svcpt = svcpt;
904 nrs->nrs_queue_type = queue;
905 spin_lock_init(&nrs->nrs_lock);
906 INIT_LIST_HEAD(&nrs->nrs_policy_list);
907 INIT_LIST_HEAD(&nrs->nrs_policy_queued);
909 return nrs_register_policies_locked(nrs);
913 * Allocates a regular and optionally a high-priority NRS head (if the service
914 * handles high-priority RPCs), and then registers all available compatible
915 * policies on those NRS heads.
917 * \param[in,out] svcpt the PTLRPC service partition to setup
919 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
921 static int nrs_svcpt_setup_locked(struct ptlrpc_service_part *svcpt)
923 struct ptlrpc_nrs *nrs;
926 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
929 * Initialize the regular NRS head.
931 nrs = nrs_svcpt2nrs(svcpt, false);
932 rc = nrs_svcpt_setup_locked0(nrs, svcpt);
937 * Optionally allocate a high-priority NRS head.
939 if (!svcpt->scp_service->srv_ops.so_hpreq_handler)
943 kzalloc_node(sizeof(*svcpt->scp_nrs_hp), GFP_NOFS,
944 cfs_cpt_spread_node(svcpt->scp_service->srv_cptable,
946 if (!svcpt->scp_nrs_hp) {
951 nrs = nrs_svcpt2nrs(svcpt, true);
952 rc = nrs_svcpt_setup_locked0(nrs, svcpt);
959 * Unregisters all policies on all available NRS heads in a service partition;
960 * called at PTLRPC service unregistration time.
962 * \param[in] svcpt the PTLRPC service partition
964 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
966 static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt)
968 struct ptlrpc_nrs *nrs;
969 struct ptlrpc_nrs_policy *policy;
970 struct ptlrpc_nrs_policy *tmp;
974 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
977 /* scp_nrs_hp could be NULL due to short of memory. */
978 nrs = hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg;
979 /* check the nrs_svcpt to see if nrs is initialized. */
980 if (!nrs || !nrs->nrs_svcpt)
982 nrs->nrs_stopping = 1;
984 list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list, pol_list) {
985 rc = nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
990 * If the service partition has an HP NRS head, clean that up as well.
992 if (!hp && nrs_svcpt_has_hp(svcpt)) {
1002 * Returns the descriptor for a policy as identified by by \a name.
1004 * \param[in] name the policy name
1006 * \retval the policy descriptor
1009 static struct ptlrpc_nrs_pol_desc *nrs_policy_find_desc_locked(const char *name)
1011 struct ptlrpc_nrs_pol_desc *tmp;
1013 list_for_each_entry(tmp, &nrs_core.nrs_policies, pd_list) {
1014 if (strncmp(tmp->pd_name, name, NRS_POL_NAME_MAX) == 0)
1021 * Removes the policy from all supported NRS heads of all partitions of all
1024 * \param[in] desc the policy descriptor to unregister
1027 * \retval 0 successfully unregistered policy on all supported NRS heads
1029 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
1030 * \pre mutex_is_locked(&ptlrpc_all_services_mutex)
1032 static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc)
1034 struct ptlrpc_nrs *nrs;
1035 struct ptlrpc_service *svc;
1036 struct ptlrpc_service_part *svcpt;
1040 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
1041 LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex));
1043 list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
1044 if (!nrs_policy_compatible(svc, desc) ||
1045 unlikely(svc->srv_is_stopping))
1048 ptlrpc_service_for_each_part(svcpt, i, svc) {
1052 nrs = nrs_svcpt2nrs(svcpt, hp);
1053 rc = nrs_policy_unregister(nrs, desc->pd_name);
1055 * Ignore -ENOENT as the policy may not have registered
1056 * successfully on all service partitions.
1058 if (rc == -ENOENT) {
1060 } else if (rc != 0) {
1061 CERROR("Failed to unregister NRS policy %s for partition %d of service %s: %d\n",
1062 desc->pd_name, svcpt->scp_cpt,
1063 svcpt->scp_service->srv_name, rc);
1067 if (!hp && nrs_svc_has_hp(svc)) {
1073 if (desc->pd_ops->op_lprocfs_fini)
1074 desc->pd_ops->op_lprocfs_fini(svc);
1081 * Registers a new policy with NRS core.
1083 * The function will only succeed if policy registration with all compatible
1084 * service partitions (if any) is successful.
1086 * N.B. This function should be called either at ptlrpc module initialization
1087 * time when registering a policy that ships with NRS core, or in a
1088 * module's init() function for policies registering from other modules.
1090 * \param[in] conf configuration information for the new policy to register
1095 static int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf)
1097 struct ptlrpc_service *svc;
1098 struct ptlrpc_nrs_pol_desc *desc;
1102 LASSERT(conf->nc_ops);
1103 LASSERT(conf->nc_compat);
1104 LASSERT(ergo(conf->nc_compat == nrs_policy_compat_one,
1105 conf->nc_compat_svc_name));
1106 LASSERT(ergo((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0,
1109 conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
1112 * External policies are not allowed to start immediately upon
1113 * registration, as there is a relatively higher chance that their
1114 * registration might fail. In such a case, some policy instances may
1115 * already have requests queued wen unregistration needs to happen as
1116 * part o cleanup; since there is currently no way to drain requests
1117 * from a policy unless the service is unregistering, we just disallow
1120 if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) &&
1121 (conf->nc_flags & (PTLRPC_NRS_FL_FALLBACK |
1122 PTLRPC_NRS_FL_REG_START))) {
1123 CERROR("NRS: failing to register policy %s. Please check policy flags; external policies cannot act as fallback policies, or be started immediately upon registration without interaction with lprocfs\n",
1128 mutex_lock(&nrs_core.nrs_mutex);
1130 if (nrs_policy_find_desc_locked(conf->nc_name)) {
1131 CERROR("NRS: failing to register policy %s which has already been registered with NRS core!\n",
1137 desc = kzalloc(sizeof(*desc), GFP_NOFS);
1143 len = strlcpy(desc->pd_name, conf->nc_name, sizeof(desc->pd_name));
1144 if (len >= sizeof(desc->pd_name)) {
1149 desc->pd_ops = conf->nc_ops;
1150 desc->pd_compat = conf->nc_compat;
1151 desc->pd_compat_svc_name = conf->nc_compat_svc_name;
1152 if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0)
1153 desc->pd_owner = conf->nc_owner;
1154 desc->pd_flags = conf->nc_flags;
1155 atomic_set(&desc->pd_refs, 0);
1158 * For policies that are held in the same module as NRS (currently
1159 * ptlrpc), do not register the policy with all compatible services,
1160 * as the services will not have started at this point, since we are
1161 * calling from ptlrpc module initialization code. In such cases each
1162 * service will register all compatible policies later, via
1163 * ptlrpc_service_nrs_setup().
1165 if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) == 0)
1169 * Register the new policy on all compatible services
1171 mutex_lock(&ptlrpc_all_services_mutex);
1173 list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
1174 struct ptlrpc_service_part *svcpt;
1178 if (!nrs_policy_compatible(svc, desc) ||
1179 unlikely(svc->srv_is_stopping))
1182 ptlrpc_service_for_each_part(svcpt, i, svc) {
1183 struct ptlrpc_nrs *nrs;
1186 nrs = nrs_svcpt2nrs(svcpt, hp);
1187 rc = nrs_policy_register(nrs, desc);
1189 CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
1190 desc->pd_name, svcpt->scp_cpt,
1191 svcpt->scp_service->srv_name, rc);
1193 rc2 = nrs_policy_unregister_locked(desc);
1195 * Should not fail at this point
1198 mutex_unlock(&ptlrpc_all_services_mutex);
1203 if (!hp && nrs_svc_has_hp(svc)) {
1210 * No need to take a reference to other modules here, as we
1211 * will be calling from the module's init() function.
1213 if (desc->pd_ops->op_lprocfs_init) {
1214 rc = desc->pd_ops->op_lprocfs_init(svc);
1216 rc2 = nrs_policy_unregister_locked(desc);
1218 * Should not fail at this point
1221 mutex_unlock(&ptlrpc_all_services_mutex);
1228 mutex_unlock(&ptlrpc_all_services_mutex);
1230 list_add_tail(&desc->pd_list, &nrs_core.nrs_policies);
1232 mutex_unlock(&nrs_core.nrs_mutex);
1238 * Setup NRS heads on all service partitions of service \a svc, and register
1239 * all compatible policies on those NRS heads.
1241 * To be called from within ptl
1242 * \param[in] svc the service to setup
1244 * \retval -ve error, the calling logic should eventually call
1245 * ptlrpc_service_nrs_cleanup() to undo any work performed
1248 * \see ptlrpc_register_service()
1249 * \see ptlrpc_service_nrs_cleanup()
1251 int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc)
1253 struct ptlrpc_service_part *svcpt;
1254 const struct ptlrpc_nrs_pol_desc *desc;
1258 mutex_lock(&nrs_core.nrs_mutex);
1261 * Initialize NRS heads on all service CPTs.
1263 ptlrpc_service_for_each_part(svcpt, i, svc) {
1264 rc = nrs_svcpt_setup_locked(svcpt);
1270 * Set up lprocfs interfaces for all supported policies for the
1273 list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
1274 if (!nrs_policy_compatible(svc, desc))
1277 if (desc->pd_ops->op_lprocfs_init) {
1278 rc = desc->pd_ops->op_lprocfs_init(svc);
1286 mutex_unlock(&nrs_core.nrs_mutex);
1292 * Unregisters all policies on all service partitions of service \a svc.
1294 * \param[in] svc the PTLRPC service to unregister
1296 void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc)
1298 struct ptlrpc_service_part *svcpt;
1299 const struct ptlrpc_nrs_pol_desc *desc;
1302 mutex_lock(&nrs_core.nrs_mutex);
1305 * Clean up NRS heads on all service partitions
1307 ptlrpc_service_for_each_part(svcpt, i, svc)
1308 nrs_svcpt_cleanup_locked(svcpt);
1311 * Clean up lprocfs interfaces for all supported policies for the
1314 list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
1315 if (!nrs_policy_compatible(svc, desc))
1318 if (desc->pd_ops->op_lprocfs_fini)
1319 desc->pd_ops->op_lprocfs_fini(svc);
1322 mutex_unlock(&nrs_core.nrs_mutex);
1326 * Obtains NRS head resources for request \a req.
1328 * These could be either on the regular or HP NRS head of \a svcpt; resources
1329 * taken on the regular head can later be swapped for HP head resources by
1330 * ldlm_lock_reorder_req().
1332 * \param[in] svcpt the service partition
1333 * \param[in] req the request
1334 * \param[in] hp which NRS head of \a svcpt to use
1336 void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
1337 struct ptlrpc_request *req, bool hp)
1339 struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1341 memset(&req->rq_nrq, 0, sizeof(req->rq_nrq));
1342 nrs_resource_get_safe(nrs, &req->rq_nrq, req->rq_nrq.nr_res_ptrs,
1346 * It is fine to access \e nr_initialized without locking as there is
1347 * no contention at this early stage.
1349 req->rq_nrq.nr_initialized = 1;
1353 * Releases resources for a request; is called after the request has been
1356 * \param[in] req the request
1358 * \see ptlrpc_server_finish_request()
1360 void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req)
1362 if (req->rq_nrq.nr_initialized) {
1363 nrs_resource_put_safe(req->rq_nrq.nr_res_ptrs);
1364 /* no protection on bit nr_initialized because no
1365 * contention at this late stage
1367 req->rq_nrq.nr_finalized = 1;
1371 void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req)
1373 if (req->rq_nrq.nr_started)
1374 nrs_request_stop(&req->rq_nrq);
1378 * Enqueues request \a req on either the regular or high-priority NRS head
1379 * of service partition \a svcpt.
1381 * \param[in] svcpt the service partition
1382 * \param[in] req the request to be enqueued
1383 * \param[in] hp whether to enqueue the request on the regular or
1384 * high-priority NRS head.
1386 void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
1387 struct ptlrpc_request *req, bool hp)
1389 spin_lock(&svcpt->scp_req_lock);
1392 ptlrpc_nrs_hpreq_add_nolock(req);
1394 ptlrpc_nrs_req_add_nolock(req);
1396 spin_unlock(&svcpt->scp_req_lock);
1399 static void nrs_request_removed(struct ptlrpc_nrs_policy *policy)
1401 LASSERT(policy->pol_nrs->nrs_req_queued > 0);
1402 LASSERT(policy->pol_req_queued > 0);
1404 policy->pol_nrs->nrs_req_queued--;
1405 policy->pol_req_queued--;
1408 * If the policy has no more requests queued, remove it from
1409 * ptlrpc_nrs::nrs_policy_queued.
1411 if (unlikely(policy->pol_req_queued == 0)) {
1412 list_del_init(&policy->pol_list_queued);
1415 * If there are other policies with queued requests, move the
1416 * current policy to the end so that we can round robin over
1417 * all policies and drain the requests.
1419 } else if (policy->pol_req_queued != policy->pol_nrs->nrs_req_queued) {
1420 LASSERT(policy->pol_req_queued <
1421 policy->pol_nrs->nrs_req_queued);
1423 list_move_tail(&policy->pol_list_queued,
1424 &policy->pol_nrs->nrs_policy_queued);
1429 * Obtains a request for handling from an NRS head of service partition
1432 * \param[in] svcpt the service partition
1433 * \param[in] hp whether to obtain a request from the regular or
1434 * high-priority NRS head.
1435 * \param[in] peek when set, signifies that we just want to examine the
1436 * request, and not handle it, so the request is not removed
1438 * \param[in] force when set, it will force a policy to return a request if it
1441 * \retval the request to be handled
1442 * \retval NULL the head has no requests to serve
1444 struct ptlrpc_request *
1445 ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
1446 bool peek, bool force)
1448 struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1449 struct ptlrpc_nrs_policy *policy;
1450 struct ptlrpc_nrs_request *nrq;
1453 * Always try to drain requests from all NRS polices even if they are
1454 * inactive, because the user can change policy status at runtime.
1456 list_for_each_entry(policy, &nrs->nrs_policy_queued, pol_list_queued) {
1457 nrq = nrs_request_get(policy, peek, force);
1459 if (likely(!peek)) {
1460 nrq->nr_started = 1;
1462 policy->pol_req_started++;
1463 policy->pol_nrs->nrs_req_started++;
1465 nrs_request_removed(policy);
1468 return container_of(nrq, struct ptlrpc_request, rq_nrq);
1476 * Returns whether there are any requests currently enqueued on any of the
1477 * policies of service partition's \a svcpt NRS head specified by \a hp. Should
1478 * be called while holding ptlrpc_service_part::scp_req_lock to get a reliable
1481 * \param[in] svcpt the service partition to enquire.
1482 * \param[in] hp whether the regular or high-priority NRS head is to be
1485 * \retval false the indicated NRS head has no enqueued requests.
1486 * \retval true the indicated NRS head has some enqueued requests.
1488 bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp)
1490 struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1492 return nrs->nrs_req_queued > 0;
1496 * Carries out a control operation \a opc on the policy identified by the
1497 * human-readable \a name, on either all partitions, or only on the first
1498 * partition of service \a svc.
1500 * \param[in] svc the service the policy belongs to.
1501 * \param[in] queue whether to carry out the command on the policy which
1502 * belongs to the regular, high-priority, or both NRS
1503 * heads of service partitions of \a svc.
1504 * \param[in] name the policy to act upon, by human-readable name
1505 * \param[in] opc the opcode of the operation to carry out
1506 * \param[in] single when set, the operation will only be carried out on the
1507 * NRS heads of the first service partition of \a svc.
1508 * This is useful for some policies which e.g. share
1509 * identical values on the same parameters of different
1510 * service partitions; when reading these parameters via
1511 * lprocfs, these policies may just want to obtain and
1512 * print out the values from the first service partition.
1513 * Storing these values centrally elsewhere then could be
1514 * another solution for this.
1515 * \param[in,out] arg can be used as a generic in/out buffer between control
1516 * operations and the user environment.
1518 *\retval -ve error condition
1519 *\retval 0 operation was carried out successfully
1521 int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
1522 enum ptlrpc_nrs_queue_type queue, char *name,
1523 enum ptlrpc_nrs_ctl opc, bool single, void *arg)
1525 struct ptlrpc_service_part *svcpt;
1529 LASSERT(opc != PTLRPC_NRS_CTL_INVALID);
1531 if ((queue & PTLRPC_NRS_QUEUE_BOTH) == 0)
1534 ptlrpc_service_for_each_part(svcpt, i, svc) {
1535 if ((queue & PTLRPC_NRS_QUEUE_REG) != 0) {
1536 rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, false), name,
1538 if (rc != 0 || (queue == PTLRPC_NRS_QUEUE_REG &&
1543 if ((queue & PTLRPC_NRS_QUEUE_HP) != 0) {
1545 * XXX: We could optionally check for
1546 * nrs_svc_has_hp(svc) here, and return an error if it
1547 * is false. Right now we rely on the policies' lprocfs
1548 * handlers that call the present function to make this
1549 * check; if they fail to do so, they might hit the
1550 * assertion inside nrs_svcpt2nrs() below.
1552 rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, true), name,
1554 if (rc != 0 || single)
1563 * Adds all policies that ship with the ptlrpc module, to NRS core's list of
1564 * policies \e nrs_core.nrs_policies.
1566 * \retval 0 all policies have been registered successfully
1569 int ptlrpc_nrs_init(void)
1573 mutex_init(&nrs_core.nrs_mutex);
1574 INIT_LIST_HEAD(&nrs_core.nrs_policies);
1576 rc = ptlrpc_nrs_policy_register(&nrs_conf_fifo);
1583 * Since no PTLRPC services have been started at this point, all we need
1584 * to do for cleanup is to free the descriptors.
1592 * Removes all policy descriptors from nrs_core::nrs_policies, and frees the
1593 * policy descriptors.
1595 * Since all PTLRPC services are stopped at this point, there are no more
1596 * instances of any policies, because each service will have stopped its policy
1597 * instances in ptlrpc_service_nrs_cleanup(), so we just need to free the
1600 void ptlrpc_nrs_fini(void)
1602 struct ptlrpc_nrs_pol_desc *desc;
1603 struct ptlrpc_nrs_pol_desc *tmp;
1605 list_for_each_entry_safe(desc, tmp, &nrs_core.nrs_policies, pd_list) {
1606 list_del_init(&desc->pd_list);