4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details.
15 * You should have received a copy of the GNU General Public License
16 * version 2 along with this program; If not, see
17 * http://www.gnu.org/licenses/gpl-2.0.html
22 * Copyright (c) 2014, Intel Corporation.
24 * Copyright 2012 Xyratex Technology Limited
28 * Network Request Scheduler (NRS)
36 * \defgroup nrs Network Request Scheduler
39 struct ptlrpc_nrs_policy;
40 struct ptlrpc_nrs_resource;
41 struct ptlrpc_nrs_request;
44 * NRS control operations.
46 * These are common for all policies.
52 PTLRPC_NRS_CTL_INVALID,
54 * Activate the policy.
58 * Reserved for multiple primary policies, which may be a possibility
63 * Policies can start using opcodes from this value and onwards for
64 * their own purposes; the assigned value itself is arbitrary.
66 PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20,
70 * NRS policy operations.
72 * These determine the behaviour of a policy, and are called in response to
75 struct ptlrpc_nrs_pol_ops {
77 * Called during policy registration; this operation is optional.
79 * \param[in,out] policy The policy being initialized
81 int (*op_policy_init)(struct ptlrpc_nrs_policy *policy);
83 * Called during policy unregistration; this operation is optional.
85 * \param[in,out] policy The policy being unregistered/finalized
87 void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy);
89 * Called when activating a policy via lprocfs; policies allocate and
90 * initialize their resources here; this operation is optional.
92 * \param[in,out] policy The policy being started
94 * \see nrs_policy_start_locked()
96 int (*op_policy_start)(struct ptlrpc_nrs_policy *policy);
98 * Called when deactivating a policy via lprocfs; policies deallocate
99 * their resources here; this operation is optional
101 * \param[in,out] policy The policy being stopped
103 * \see nrs_policy_stop0()
105 void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy);
107 * Used for policy-specific operations; i.e. not generic ones like
108 * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous
109 * to an ioctl; this operation is optional.
111 * \param[in,out] policy The policy carrying out operation \a opc
112 * \param[in] opc The command operation being carried out
113 * \param[in,out] arg An generic buffer for communication between the
114 * user and the control operation
119 * \see ptlrpc_nrs_policy_control()
121 int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy,
122 enum ptlrpc_nrs_ctl opc, void *arg);
125 * Called when obtaining references to the resources of the resource
126 * hierarchy for a request that has arrived for handling at the PTLRPC
127 * service. Policies should return -ve for requests they do not wish
128 * to handle. This operation is mandatory.
130 * \param[in,out] policy The policy we're getting resources for.
131 * \param[in,out] nrq The request we are getting resources for.
132 * \param[in] parent The parent resource of the resource being
133 * requested; set to NULL if none.
134 * \param[out] resp The resource is to be returned here; the
135 * fallback policy in an NRS head should
136 * \e always return a non-NULL pointer value.
137 * \param[in] moving_req When set, signifies that this is an attempt
138 * to obtain resources for a request being moved
139 * to the high-priority NRS head by
140 * ldlm_lock_reorder_req().
141 * This implies two things:
142 * 1. We are under obd_export::exp_rpc_lock and
143 * so should not sleep.
144 * 2. We should not perform non-idempotent or can
145 * skip performing idempotent operations that
146 * were carried out when resources were first
147 * taken for the request when it was initialized
148 * in ptlrpc_nrs_req_initialize().
150 * \retval 0, +ve The level of the returned resource in the resource
151 * hierarchy; currently only 0 (for a non-leaf resource)
152 * and 1 (for a leaf resource) are supported by the
156 * \see ptlrpc_nrs_req_initialize()
157 * \see ptlrpc_nrs_hpreq_add_nolock()
158 * \see ptlrpc_nrs_req_hp_move()
160 int (*op_res_get)(struct ptlrpc_nrs_policy *policy,
161 struct ptlrpc_nrs_request *nrq,
162 const struct ptlrpc_nrs_resource *parent,
163 struct ptlrpc_nrs_resource **resp,
166 * Called when releasing references taken for resources in the resource
167 * hierarchy for the request; this operation is optional.
169 * \param[in,out] policy The policy the resource belongs to
170 * \param[in] res The resource to be freed
172 * \see ptlrpc_nrs_req_finalize()
173 * \see ptlrpc_nrs_hpreq_add_nolock()
174 * \see ptlrpc_nrs_req_hp_move()
176 void (*op_res_put)(struct ptlrpc_nrs_policy *policy,
177 const struct ptlrpc_nrs_resource *res);
180 * Obtains a request for handling from the policy, and optionally
181 * removes the request from the policy; this operation is mandatory.
183 * \param[in,out] policy The policy to poll
184 * \param[in] peek When set, signifies that we just want to
185 * examine the request, and not handle it, so the
186 * request is not removed from the policy.
187 * \param[in] force When set, it will force a policy to return a
188 * request if it has one queued.
190 * \retval NULL No request available for handling
191 * \retval valid-pointer The request polled for handling
193 * \see ptlrpc_nrs_req_get_nolock()
195 struct ptlrpc_nrs_request *
196 (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek,
199 * Called when attempting to add a request to a policy for later
200 * handling; this operation is mandatory.
202 * \param[in,out] policy The policy on which to enqueue \a nrq
203 * \param[in,out] nrq The request to enqueue
208 * \see ptlrpc_nrs_req_add_nolock()
210 int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy,
211 struct ptlrpc_nrs_request *nrq);
213 * Removes a request from the policy's set of pending requests. Normally
214 * called after a request has been polled successfully from the policy
215 * for handling; this operation is mandatory.
217 * \param[in,out] policy The policy the request \a nrq belongs to
218 * \param[in,out] nrq The request to dequeue
220 * \see ptlrpc_nrs_req_del_nolock()
222 void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy,
223 struct ptlrpc_nrs_request *nrq);
225 * Called after the request being carried out. Could be used for
226 * job/resource control; this operation is optional.
228 * \param[in,out] policy The policy which is stopping to handle request
230 * \param[in,out] nrq The request
232 * \pre assert_spin_locked(&svcpt->scp_req_lock)
234 * \see ptlrpc_nrs_req_stop_nolock()
236 void (*op_req_stop)(struct ptlrpc_nrs_policy *policy,
237 struct ptlrpc_nrs_request *nrq);
239 * Registers the policy's lprocfs interface with a PTLRPC service.
241 * \param[in] svc The service
246 int (*op_lprocfs_init)(struct ptlrpc_service *svc);
248 * Unegisters the policy's lprocfs interface with a PTLRPC service.
250 * In cases of failed policy registration in
251 * \e ptlrpc_nrs_policy_register(), this function may be called for a
252 * service which has not registered the policy successfully, so
253 * implementations of this method should make sure their operations are
254 * safe in such cases.
256 * \param[in] svc The service
258 void (*op_lprocfs_fini)(struct ptlrpc_service *svc);
264 enum nrs_policy_flags {
266 * Fallback policy, use this flag only on a single supported policy per
267 * service. The flag cannot be used on policies that use
268 * \e PTLRPC_NRS_FL_REG_EXTERN
270 PTLRPC_NRS_FL_FALLBACK = BIT(0),
272 * Start policy immediately after registering.
274 PTLRPC_NRS_FL_REG_START = BIT(1),
276 * This is a policy registering from a module different to the one NRS
277 * core ships in (currently ptlrpc).
279 PTLRPC_NRS_FL_REG_EXTERN = BIT(2),
285 * Denotes whether an NRS instance is for handling normal or high-priority
286 * RPCs, or whether an operation pertains to one or both of the NRS instances
289 enum ptlrpc_nrs_queue_type {
290 PTLRPC_NRS_QUEUE_REG = BIT(0),
291 PTLRPC_NRS_QUEUE_HP = BIT(1),
292 PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP)
298 * A PTLRPC service has at least one NRS head instance for handling normal
299 * priority RPCs, and may optionally have a second NRS head instance for
300 * handling high-priority RPCs. Each NRS head maintains a list of available
301 * policies, of which one and only one policy is acting as the fallback policy,
302 * and optionally a different policy may be acting as the primary policy. For
303 * all RPCs handled by this NRS head instance, NRS core will first attempt to
304 * enqueue the RPC using the primary policy (if any). The fallback policy is
305 * used in the following cases:
306 * - when there was no primary policy in the
307 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request
309 * - when the primary policy that was at the
310 * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
311 * RPC was initialized, denoted it did not wish, or for some other reason was
312 * not able to handle the request, by returning a non-valid NRS resource
314 * - when the primary policy that was at the
315 * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the
316 * RPC was initialized, fails later during the request enqueueing stage.
318 * \see nrs_resource_get_safe()
319 * \see nrs_request_enqueue()
323 /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */
325 * List of registered policies
327 struct list_head nrs_policy_list;
329 * List of policies with queued requests. Policies that have any
330 * outstanding requests are queued here, and this list is queried
331 * in a round-robin manner from NRS core when obtaining a request
332 * for handling. This ensures that requests from policies that at some
333 * point transition away from the
334 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained.
336 struct list_head nrs_policy_queued;
338 * Service partition for this NRS head
340 struct ptlrpc_service_part *nrs_svcpt;
342 * Primary policy, which is the preferred policy for handling RPCs
344 struct ptlrpc_nrs_policy *nrs_policy_primary;
346 * Fallback policy, which is the backup policy for handling RPCs
348 struct ptlrpc_nrs_policy *nrs_policy_fallback;
350 * This NRS head handles either HP or regular requests
352 enum ptlrpc_nrs_queue_type nrs_queue_type;
354 * # queued requests from all policies in this NRS head
356 unsigned long nrs_req_queued;
358 * # scheduled requests from all policies in this NRS head
360 unsigned long nrs_req_started;
362 * # policies on this NRS
364 unsigned int nrs_num_pols;
366 * This NRS head is in progress of starting a policy
368 unsigned int nrs_policy_starting:1;
370 * In progress of shutting down the whole NRS head; used during
373 unsigned int nrs_stopping:1;
375 * NRS policy is throttling request
377 unsigned int nrs_throttling:1;
380 #define NRS_POL_NAME_MAX 16
381 #define NRS_POL_ARG_MAX 16
383 struct ptlrpc_nrs_pol_desc;
386 * Service compatibility predicate; this determines whether a policy is adequate
387 * for handling RPCs of a particular PTLRPC service.
389 * XXX:This should give the same result during policy registration and
390 * unregistration, and for all partitions of a service; so the result should not
391 * depend on temporal service or other properties, that may influence the
394 typedef bool (*nrs_pol_desc_compat_t)(const struct ptlrpc_service *svc,
395 const struct ptlrpc_nrs_pol_desc *desc);
397 struct ptlrpc_nrs_pol_conf {
399 * Human-readable policy name
401 char nc_name[NRS_POL_NAME_MAX];
403 * NRS operations for this policy
405 const struct ptlrpc_nrs_pol_ops *nc_ops;
407 * Service compatibility predicate
409 nrs_pol_desc_compat_t nc_compat;
411 * Set for policies that support a single ptlrpc service, i.e. ones that
412 * have \a pd_compat set to nrs_policy_compat_one(). The variable value
413 * depicts the name of the single service that such policies are
416 const char *nc_compat_svc_name;
418 * Owner module for this policy descriptor; policies registering from a
419 * different module to the one the NRS framework is held within
420 * (currently ptlrpc), should set this field to THIS_MODULE.
422 struct module *nc_owner;
424 * Policy registration flags; a bitmask of \e nrs_policy_flags
426 unsigned int nc_flags;
430 * NRS policy registering descriptor
432 * Is used to hold a description of a policy that can be passed to NRS core in
433 * order to register the policy with NRS heads in different PTLRPC services.
435 struct ptlrpc_nrs_pol_desc {
437 * Human-readable policy name
439 char pd_name[NRS_POL_NAME_MAX];
441 * Link into nrs_core::nrs_policies
443 struct list_head pd_list;
445 * NRS operations for this policy
447 const struct ptlrpc_nrs_pol_ops *pd_ops;
449 * Service compatibility predicate
451 nrs_pol_desc_compat_t pd_compat;
453 * Set for policies that are compatible with only one PTLRPC service.
455 * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name
457 const char *pd_compat_svc_name;
459 * Owner module for this policy descriptor.
461 * We need to hold a reference to the module whenever we might make use
462 * of any of the module's contents, i.e.
463 * - If one or more instances of the policy are at a state where they
464 * might be handling a request, i.e.
465 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or
466 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to
467 * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference
468 * is taken on the module when
469 * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it
470 * becomes 0, so that we hold only one reference to the module maximum
473 * We do not need to hold a reference to the module, even though we
474 * might use code and data from the module, in the following cases:
475 * - During external policy registration, because this should happen in
476 * the module's init() function, in which case the module is safe from
477 * removal because a reference is being held on the module by the
478 * kernel, and iirc kmod (and I guess module-init-tools also) will
479 * serialize any racing processes properly anyway.
480 * - During external policy unregistration, because this should happen
481 * in a module's exit() function, and any attempts to start a policy
482 * instance would need to take a reference on the module, and this is
483 * not possible once we have reached the point where the exit()
485 * - During service registration and unregistration, as service setup
486 * and cleanup, and policy registration, unregistration and policy
487 * instance starting, are serialized by \e nrs_core::nrs_mutex, so
488 * as long as users adhere to the convention of registering policies
489 * in init() and unregistering them in module exit() functions, there
490 * should not be a race between these operations.
491 * - During any policy-specific lprocfs operations, because a reference
492 * is held by the kernel on a proc entry that has been entered by a
493 * syscall, so as long as proc entries are removed during
494 * unregistration time, then unregistration and lprocfs operations
495 * will be properly serialized.
497 struct module *pd_owner;
499 * Bitmask of \e nrs_policy_flags
501 unsigned int pd_flags;
503 * # of references on this descriptor
511 * Policies transition from one state to the other during their lifetime
513 enum ptlrpc_nrs_pol_state {
515 * Not a valid policy state.
517 NRS_POL_STATE_INVALID,
519 * Policies are at this state either at the start of their life, or
520 * transition here when the user selects a different policy to act
521 * as the primary one.
523 NRS_POL_STATE_STOPPED,
525 * Policy is progress of stopping
527 NRS_POL_STATE_STOPPING,
529 * Policy is in progress of starting
531 NRS_POL_STATE_STARTING,
533 * A policy is in this state in two cases:
534 * - it is the fallback policy, which is always in this state.
535 * - it has been activated by the user; i.e. it is the primary policy,
537 NRS_POL_STATE_STARTED,
541 * NRS policy information
543 * Used for obtaining information for the status of a policy via lprocfs
545 struct ptlrpc_nrs_pol_info {
549 char pi_name[NRS_POL_NAME_MAX];
553 char pi_arg[NRS_POL_ARG_MAX];
555 * Current policy state
557 enum ptlrpc_nrs_pol_state pi_state;
559 * # RPCs enqueued for later dispatching by the policy
563 * # RPCs started for dispatch by the policy
567 * Is this a fallback policy?
569 unsigned pi_fallback:1;
575 * There is one instance of this for each policy in each NRS head of each
576 * PTLRPC service partition.
578 struct ptlrpc_nrs_policy {
580 * Linkage into the NRS head's list of policies,
581 * ptlrpc_nrs:nrs_policy_list
583 struct list_head pol_list;
585 * Linkage into the NRS head's list of policies with enqueued
586 * requests ptlrpc_nrs:nrs_policy_queued
588 struct list_head pol_list_queued;
590 * Current state of this policy
592 enum ptlrpc_nrs_pol_state pol_state;
594 * Bitmask of nrs_policy_flags
596 unsigned int pol_flags;
598 * # RPCs enqueued for later dispatching by the policy
602 * # RPCs started for dispatch by the policy
604 long pol_req_started;
606 * Usage Reference count taken on the policy instance
610 * Human-readable policy argument
612 char pol_arg[NRS_POL_ARG_MAX];
614 * The NRS head this policy has been created at
616 struct ptlrpc_nrs *pol_nrs;
618 * Private policy data; varies by policy type
622 * Policy descriptor for this policy instance.
624 struct ptlrpc_nrs_pol_desc *pol_desc;
630 * Resources are embedded into two types of NRS entities:
631 * - Inside NRS policies, in the policy's private data in
632 * ptlrpc_nrs_policy::pol_private
633 * - In objects that act as prime-level scheduling entities in different NRS
634 * policies; e.g. on a policy that performs round robin or similar order
635 * scheduling across client NIDs, there would be one NRS resource per unique
636 * client NID. On a policy which performs round robin scheduling across
637 * backend filesystem objects, there would be one resource associated with
638 * each of the backend filesystem objects partaking in the scheduling
639 * performed by the policy.
641 * NRS resources share a parent-child relationship, in which resources embedded
642 * in policy instances are the parent entities, with all scheduling entities
643 * a policy schedules across being the children, thus forming a simple resource
644 * hierarchy. This hierarchy may be extended with one or more levels in the
645 * future if the ability to have more than one primary policy is added.
647 * Upon request initialization, references to the then active NRS policies are
648 * taken and used to later handle the dispatching of the request with one of
651 * \see nrs_resource_get_safe()
652 * \see ptlrpc_nrs_req_add()
654 struct ptlrpc_nrs_resource {
656 * This NRS resource's parent; is NULL for resources embedded in NRS
657 * policy instances; i.e. those are top-level ones.
659 struct ptlrpc_nrs_resource *res_parent;
661 * The policy associated with this resource.
663 struct ptlrpc_nrs_policy *res_policy;
672 #include <lustre_nrs_fifo.h>
677 * Instances of this object exist embedded within ptlrpc_request; the main
678 * purpose of this object is to hold references to the request's resources
679 * for the lifetime of the request, and to hold properties that policies use
680 * use for determining the request's scheduling priority.
682 struct ptlrpc_nrs_request {
684 * The request's resource hierarchy.
686 struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX];
688 * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the
689 * policy that was used to enqueue the request.
691 * \see nrs_request_enqueue()
693 unsigned int nr_res_idx;
694 unsigned int nr_initialized:1;
695 unsigned int nr_enqueued:1;
696 unsigned int nr_started:1;
697 unsigned int nr_finalized:1;
700 * Policy-specific fields, used for determining a request's scheduling
701 * priority, and other supporting functionality.
705 * Fields for the FIFO policy
707 struct nrs_fifo_req fifo;
710 * Externally-registering policies may want to use this to allocate
711 * their own request properties.