4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2015, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
33 #define DEBUG_SUBSYSTEM S_LOV
35 #include <linux/libcfs/libcfs.h>
37 #include <obd_class.h>
38 #include <uapi/linux/lustre/lustre_idl.h>
39 #include "lov_internal.h"
41 static void lov_init_set(struct lov_request_set *set)
44 atomic_set(&set->set_completes, 0);
45 atomic_set(&set->set_success, 0);
46 INIT_LIST_HEAD(&set->set_list);
49 static void lov_finish_set(struct lov_request_set *set)
51 struct list_head *pos, *n;
54 list_for_each_safe(pos, n, &set->set_list) {
55 struct lov_request *req = list_entry(pos,
58 list_del_init(&req->rq_link);
60 kfree(req->rq_oi.oi_osfs);
66 static void lov_update_set(struct lov_request_set *set,
67 struct lov_request *req, int rc)
69 atomic_inc(&set->set_completes);
71 atomic_inc(&set->set_success);
74 static void lov_set_add_req(struct lov_request *req,
75 struct lov_request_set *set)
77 list_add_tail(&req->rq_link, &set->set_list);
82 static int lov_check_set(struct lov_obd *lov, int idx)
85 struct lov_tgt_desc *tgt;
87 mutex_lock(&lov->lov_lock);
88 tgt = lov->lov_tgts[idx];
89 rc = !tgt || tgt->ltd_active ||
91 class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried);
92 mutex_unlock(&lov->lov_lock);
97 /* Check if the OSC connection exists and is active.
98 * If the OSC has not yet had a chance to connect to the OST the first time,
99 * wait once for it to connect instead of returning an error.
101 static int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx)
103 wait_queue_head_t waitq;
104 struct l_wait_info lwi;
105 struct lov_tgt_desc *tgt;
108 mutex_lock(&lov->lov_lock);
110 tgt = lov->lov_tgts[ost_idx];
112 if (unlikely(!tgt)) {
117 if (likely(tgt->ltd_active)) {
122 if (tgt->ltd_exp && class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried) {
127 mutex_unlock(&lov->lov_lock);
129 init_waitqueue_head(&waitq);
130 lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(obd_timeout),
131 cfs_time_seconds(1), NULL, NULL);
133 rc = l_wait_event(waitq, lov_check_set(lov, ost_idx), &lwi);
140 mutex_unlock(&lov->lov_lock);
144 #define LOV_U64_MAX ((__u64)~0ULL)
145 #define LOV_SUM_MAX(tot, add) \
147 if ((tot) + (add) < (tot)) \
148 (tot) = LOV_U64_MAX; \
153 static int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,
157 __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov,
159 if (osfs->os_files != LOV_U64_MAX)
160 lov_do_div64(osfs->os_files, expected_stripes);
161 if (osfs->os_ffree != LOV_U64_MAX)
162 lov_do_div64(osfs->os_ffree, expected_stripes);
164 spin_lock(&obd->obd_osfs_lock);
165 memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
166 obd->obd_osfs_age = cfs_time_current_64();
167 spin_unlock(&obd->obd_osfs_lock);
174 int lov_fini_statfs_set(struct lov_request_set *set)
181 if (atomic_read(&set->set_completes)) {
182 rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
183 atomic_read(&set->set_success));
191 static void lov_update_statfs(struct obd_statfs *osfs,
192 struct obd_statfs *lov_sfs,
195 int shift = 0, quit = 0;
199 memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
201 if (osfs->os_bsize != lov_sfs->os_bsize) {
202 /* assume all block sizes are always powers of 2 */
203 /* get the bits difference */
204 tmp = osfs->os_bsize | lov_sfs->os_bsize;
205 for (shift = 0; shift <= 64; ++shift) {
216 if (osfs->os_bsize < lov_sfs->os_bsize) {
217 osfs->os_bsize = lov_sfs->os_bsize;
219 osfs->os_bfree >>= shift;
220 osfs->os_bavail >>= shift;
221 osfs->os_blocks >>= shift;
222 } else if (shift != 0) {
223 lov_sfs->os_bfree >>= shift;
224 lov_sfs->os_bavail >>= shift;
225 lov_sfs->os_blocks >>= shift;
227 osfs->os_bfree += lov_sfs->os_bfree;
228 osfs->os_bavail += lov_sfs->os_bavail;
229 osfs->os_blocks += lov_sfs->os_blocks;
230 /* XXX not sure about this one - depends on policy.
231 * - could be minimum if we always stripe on all OBDs
232 * (but that would be wrong for any other policy,
233 * if one of the OBDs has no more objects left)
234 * - could be sum if we stripe whole objects
235 * - could be average, just to give a nice number
237 * To give a "reasonable" (if not wholly accurate)
238 * number, we divide the total number of free objects
239 * by expected stripe count (watch out for overflow).
241 LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
242 LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
246 /* The callback for osc_statfs_async that finalizes a request info when a
247 * response is received.
249 static int cb_statfs_update(void *cookie, int rc)
251 struct obd_info *oinfo = cookie;
252 struct lov_request *lovreq;
253 struct lov_request_set *set;
254 struct obd_statfs *osfs, *lov_sfs;
256 struct lov_tgt_desc *tgt;
257 struct obd_device *lovobd, *tgtobd;
260 lovreq = container_of(oinfo, struct lov_request, rq_oi);
261 set = lovreq->rq_rqset;
262 lovobd = set->set_obd;
263 lov = &lovobd->u.lov;
264 osfs = set->set_oi->oi_osfs;
265 lov_sfs = oinfo->oi_osfs;
266 success = atomic_read(&set->set_success);
267 /* XXX: the same is done in lov_update_common_set, however
268 * lovset->set_exp is not initialized.
270 lov_update_set(set, lovreq, rc);
275 tgt = lov->lov_tgts[lovreq->rq_idx];
276 if (!tgt || !tgt->ltd_active)
279 tgtobd = class_exp2obd(tgt->ltd_exp);
280 spin_lock(&tgtobd->obd_osfs_lock);
281 memcpy(&tgtobd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
282 if ((oinfo->oi_flags & OBD_STATFS_FROM_CACHE) == 0)
283 tgtobd->obd_osfs_age = cfs_time_current_64();
284 spin_unlock(&tgtobd->obd_osfs_lock);
287 lov_update_statfs(osfs, lov_sfs, success);
293 int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
294 struct lov_request_set **reqset)
296 struct lov_request_set *set;
297 struct lov_obd *lov = &obd->u.lov;
300 set = kzalloc(sizeof(*set), GFP_NOFS);
308 /* We only get block data from the OBD */
309 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
310 struct lov_request *req;
312 if (!lov->lov_tgts[i] ||
313 (oinfo->oi_flags & OBD_STATFS_NODELAY &&
314 !lov->lov_tgts[i]->ltd_active)) {
315 CDEBUG(D_HA, "lov idx %d inactive\n", i);
319 /* skip targets that have been explicitly disabled by the
322 if (!lov->lov_tgts[i]->ltd_exp) {
323 CDEBUG(D_HA, "lov idx %d administratively disabled\n", i);
327 if (!lov->lov_tgts[i]->ltd_active)
328 lov_check_and_wait_active(lov, i);
330 req = kzalloc(sizeof(*req), GFP_NOFS);
336 req->rq_oi.oi_osfs = kzalloc(sizeof(*req->rq_oi.oi_osfs),
338 if (!req->rq_oi.oi_osfs) {
345 req->rq_oi.oi_cb_up = cb_statfs_update;
346 req->rq_oi.oi_flags = oinfo->oi_flags;
348 lov_set_add_req(req, set);
350 if (!set->set_count) {
357 lov_fini_statfs_set(set);