1 // SPDX-License-Identifier: GPL-2.0-or-later
5 * underlying calls for unlocking locks
7 * Copyright (C) 2004 Oracle. All rights reserved.
11 #include <linux/module.h>
13 #include <linux/types.h>
14 #include <linux/highmem.h>
15 #include <linux/init.h>
16 #include <linux/sysctl.h>
17 #include <linux/random.h>
18 #include <linux/blkdev.h>
19 #include <linux/socket.h>
20 #include <linux/inet.h>
21 #include <linux/spinlock.h>
22 #include <linux/delay.h>
24 #include "../cluster/heartbeat.h"
25 #include "../cluster/nodemanager.h"
26 #include "../cluster/tcp.h"
29 #include "dlmcommon.h"
31 #define MLOG_MASK_PREFIX ML_DLM
32 #include "../cluster/masklog.h"
34 #define DLM_UNLOCK_FREE_LOCK 0x00000001
35 #define DLM_UNLOCK_CALL_AST 0x00000002
36 #define DLM_UNLOCK_REMOVE_LOCK 0x00000004
37 #define DLM_UNLOCK_REGRANT_LOCK 0x00000008
38 #define DLM_UNLOCK_CLEAR_CONVERT_TYPE 0x00000010
41 static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm,
42 struct dlm_lock_resource *res,
43 struct dlm_lock *lock,
44 struct dlm_lockstatus *lksb,
46 static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm,
47 struct dlm_lock_resource *res,
48 struct dlm_lock *lock,
49 struct dlm_lockstatus *lksb,
52 static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
53 struct dlm_lock_resource *res,
54 struct dlm_lock *lock,
55 struct dlm_lockstatus *lksb,
61 * according to the spec:
62 * http://opendlm.sourceforge.net/cvsmirror/opendlm/docs/dlmbook_final.pdf
64 * flags & LKM_CANCEL != 0: must be converting or blocked
65 * flags & LKM_CANCEL == 0: must be granted
67 * So to unlock a converting lock, you must first cancel the
68 * convert (passing LKM_CANCEL in flags), then call the unlock
69 * again (with no LKM_CANCEL in flags).
76 * taken: res->spinlock and lock->spinlock taken and dropped
78 * returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network
79 * all callers should have taken an extra ref on lock coming in
81 static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
82 struct dlm_lock_resource *res,
83 struct dlm_lock *lock,
84 struct dlm_lockstatus *lksb,
85 int flags, int *call_ast,
88 enum dlm_status status;
92 int recovery_wait = 0;
94 mlog(0, "master_node = %d, valblk = %d\n", master_node,
98 BUG_ON(res->owner != dlm->node_num);
100 BUG_ON(res->owner == dlm->node_num);
102 spin_lock(&dlm->ast_lock);
103 /* We want to be sure that we're not freeing a lock
104 * that still has AST's pending... */
105 in_use = !list_empty(&lock->ast_list);
106 spin_unlock(&dlm->ast_lock);
107 if (in_use && !(flags & LKM_CANCEL)) {
108 mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock "
109 "while waiting for an ast!", res->lockname.len,
114 spin_lock(&res->spinlock);
115 if (res->state & DLM_LOCK_RES_IN_PROGRESS) {
116 if (master_node && !(flags & LKM_CANCEL)) {
117 mlog(ML_ERROR, "lockres in progress!\n");
118 spin_unlock(&res->spinlock);
121 /* ok for this to sleep if not in a network handler */
122 __dlm_wait_on_lockres(res);
123 res->state |= DLM_LOCK_RES_IN_PROGRESS;
125 spin_lock(&lock->spinlock);
127 if (res->state & DLM_LOCK_RES_RECOVERING) {
128 status = DLM_RECOVERING;
132 if (res->state & DLM_LOCK_RES_MIGRATING) {
133 status = DLM_MIGRATING;
137 /* see above for what the spec says about
138 * LKM_CANCEL and the lock queue state */
139 if (flags & LKM_CANCEL)
140 status = dlm_get_cancel_actions(dlm, res, lock, lksb, &actions);
142 status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions);
144 if (status != DLM_NORMAL && (status != DLM_CANCELGRANT || !master_node))
147 /* By now this has been masked out of cancel requests. */
148 if (flags & LKM_VALBLK) {
149 /* make the final update to the lvb */
151 memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN);
153 flags |= LKM_PUT_LVB; /* let the send function
159 /* drop locks and send message */
160 if (flags & LKM_CANCEL)
161 lock->cancel_pending = 1;
163 lock->unlock_pending = 1;
164 spin_unlock(&lock->spinlock);
165 spin_unlock(&res->spinlock);
166 status = dlm_send_remote_unlock_request(dlm, res, lock, lksb,
168 spin_lock(&res->spinlock);
169 spin_lock(&lock->spinlock);
170 /* if the master told us the lock was already granted,
171 * let the ast handle all of these actions */
172 if (status == DLM_CANCELGRANT) {
173 actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
174 DLM_UNLOCK_REGRANT_LOCK|
175 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
176 } else if (status == DLM_RECOVERING ||
177 status == DLM_MIGRATING ||
178 status == DLM_FORWARD ||
179 status == DLM_NOLOCKMGR
181 /* must clear the actions because this unlock
182 * is about to be retried. cannot free or do
183 * any list manipulation. */
184 mlog(0, "%s:%.*s: clearing actions, %s\n",
185 dlm->name, res->lockname.len,
187 status==DLM_RECOVERING?"recovering":
188 (status==DLM_MIGRATING?"migrating":
189 (status == DLM_FORWARD ? "forward" :
193 if (flags & LKM_CANCEL)
194 lock->cancel_pending = 0;
196 if (!lock->unlock_pending)
199 lock->unlock_pending = 0;
203 /* get an extra ref on lock. if we are just switching
204 * lists here, we dont want the lock to go away. */
207 if (actions & DLM_UNLOCK_REMOVE_LOCK) {
208 list_del_init(&lock->list);
211 if (actions & DLM_UNLOCK_REGRANT_LOCK) {
213 list_add_tail(&lock->list, &res->granted);
215 if (actions & DLM_UNLOCK_CLEAR_CONVERT_TYPE) {
216 mlog(0, "clearing convert_type at %smaster node\n",
217 master_node ? "" : "non-");
218 lock->ml.convert_type = LKM_IVMODE;
221 /* remove the extra ref on lock */
225 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
226 if (!dlm_lock_on_list(&res->converting, lock))
227 BUG_ON(lock->ml.convert_type != LKM_IVMODE);
229 BUG_ON(lock->ml.convert_type == LKM_IVMODE);
230 spin_unlock(&lock->spinlock);
231 spin_unlock(&res->spinlock);
235 spin_lock(&res->spinlock);
236 /* Unlock request will directly succeed after owner dies,
237 * and the lock is already removed from grant list. We have to
238 * wait for RECOVERING done or we miss the chance to purge it
239 * since the removement is much faster than RECOVERING proc.
241 __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_RECOVERING);
242 spin_unlock(&res->spinlock);
245 /* let the caller's final dlm_lock_put handle the actual kfree */
246 if (actions & DLM_UNLOCK_FREE_LOCK) {
247 /* this should always be coupled with list removal */
248 BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK));
249 mlog(0, "lock %u:%llu should be gone now! refs=%d\n",
250 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
251 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
252 kref_read(&lock->lock_refs)-1);
255 if (actions & DLM_UNLOCK_CALL_AST)
258 /* if cancel or unlock succeeded, lvb work is done */
259 if (status == DLM_NORMAL)
260 lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
265 void dlm_commit_pending_unlock(struct dlm_lock_resource *res,
266 struct dlm_lock *lock)
268 /* leave DLM_LKSB_PUT_LVB on the lksb so any final
269 * update of the lvb will be sent to the new master */
270 list_del_init(&lock->list);
273 void dlm_commit_pending_cancel(struct dlm_lock_resource *res,
274 struct dlm_lock *lock)
276 list_move_tail(&lock->list, &res->granted);
277 lock->ml.convert_type = LKM_IVMODE;
281 static inline enum dlm_status dlmunlock_master(struct dlm_ctxt *dlm,
282 struct dlm_lock_resource *res,
283 struct dlm_lock *lock,
284 struct dlm_lockstatus *lksb,
288 return dlmunlock_common(dlm, res, lock, lksb, flags, call_ast, 1);
291 static inline enum dlm_status dlmunlock_remote(struct dlm_ctxt *dlm,
292 struct dlm_lock_resource *res,
293 struct dlm_lock *lock,
294 struct dlm_lockstatus *lksb,
295 int flags, int *call_ast)
297 return dlmunlock_common(dlm, res, lock, lksb, flags, call_ast, 0);
305 * returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network
307 static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
308 struct dlm_lock_resource *res,
309 struct dlm_lock *lock,
310 struct dlm_lockstatus *lksb,
314 struct dlm_unlock_lock unlock;
321 mlog(0, "%.*s\n", res->lockname.len, res->lockname.name);
323 if (owner == dlm->node_num) {
324 /* ended up trying to contact ourself. this means
325 * that the lockres had been remote but became local
326 * via a migration. just retry it, now as local */
327 mlog(0, "%s:%.*s: this node became the master due to a "
328 "migration, re-evaluate now\n", dlm->name,
329 res->lockname.len, res->lockname.name);
333 memset(&unlock, 0, sizeof(unlock));
334 unlock.node_idx = dlm->node_num;
335 unlock.flags = cpu_to_be32(flags);
336 unlock.cookie = lock->ml.cookie;
337 unlock.namelen = res->lockname.len;
338 memcpy(unlock.name, res->lockname.name, unlock.namelen);
340 vec[0].iov_len = sizeof(struct dlm_unlock_lock);
341 vec[0].iov_base = &unlock;
343 if (flags & LKM_PUT_LVB) {
344 /* extra data to send if we are updating lvb */
345 vec[1].iov_len = DLM_LVB_LEN;
346 vec[1].iov_base = lock->lksb->lvb;
350 tmpret = o2net_send_message_vec(DLM_UNLOCK_LOCK_MSG, dlm->key,
351 vec, veclen, owner, &status);
353 // successfully sent and received
354 if (status == DLM_FORWARD)
355 mlog(0, "master was in-progress. retry\n");
358 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
359 "node %u\n", tmpret, DLM_UNLOCK_LOCK_MSG, dlm->key, owner);
360 if (dlm_is_host_down(tmpret)) {
361 /* NOTE: this seems strange, but it is what we want.
362 * when the master goes down during a cancel or
363 * unlock, the recovery code completes the operation
364 * as if the master had not died, then passes the
365 * updated state to the recovery master. this thread
366 * just needs to finish out the operation and call
368 if (dlm_is_node_dead(dlm, owner))
373 /* something bad. this will BUG in ocfs2 */
374 ret = dlm_err_to_dlm_status(tmpret);
384 * taken: takes and drops res->spinlock
386 * returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID,
387 * return value from dlmunlock_master
389 int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
392 struct dlm_ctxt *dlm = data;
393 struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf;
394 struct dlm_lock_resource *res = NULL;
395 struct dlm_lock *lock = NULL, *iter;
396 enum dlm_status status = DLM_NORMAL;
398 struct dlm_lockstatus *lksb = NULL;
401 struct list_head *queue;
403 flags = be32_to_cpu(unlock->flags);
405 if (flags & LKM_GET_LVB) {
406 mlog(ML_ERROR, "bad args! GET_LVB specified on unlock!\n");
410 if ((flags & (LKM_PUT_LVB|LKM_CANCEL)) == (LKM_PUT_LVB|LKM_CANCEL)) {
411 mlog(ML_ERROR, "bad args! cannot modify lvb on a CANCEL "
416 if (unlock->namelen > DLM_LOCKID_NAME_MAX) {
417 mlog(ML_ERROR, "Invalid name length in unlock handler!\n");
424 mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
425 "Domain %s not fully joined!\n", dlm->name);
427 mlog(0, "lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" : "none");
429 res = dlm_lookup_lockres(dlm, unlock->name, unlock->namelen);
431 /* We assume here that a no lock resource simply means
432 * it was migrated away and destroyed before the other
433 * node could detect it. */
434 mlog(0, "returning DLM_FORWARD -- res no longer exists\n");
435 status = DLM_FORWARD;
440 spin_lock(&res->spinlock);
441 if (res->state & DLM_LOCK_RES_RECOVERING) {
442 spin_unlock(&res->spinlock);
443 mlog(0, "returning DLM_RECOVERING\n");
444 status = DLM_RECOVERING;
448 if (res->state & DLM_LOCK_RES_MIGRATING) {
449 spin_unlock(&res->spinlock);
450 mlog(0, "returning DLM_MIGRATING\n");
451 status = DLM_MIGRATING;
455 if (res->owner != dlm->node_num) {
456 spin_unlock(&res->spinlock);
457 mlog(0, "returning DLM_FORWARD -- not master\n");
458 status = DLM_FORWARD;
462 for (i=0; i<3; i++) {
463 list_for_each_entry(iter, queue, list) {
464 if (iter->ml.cookie == unlock->cookie &&
465 iter->ml.node == unlock->node_idx) {
473 /* scan granted -> converting -> blocked queues */
476 spin_unlock(&res->spinlock);
478 status = DLM_IVLOCKID;
482 /* lock was found on queue */
484 if (flags & (LKM_VALBLK|LKM_PUT_LVB) &&
485 lock->ml.type != LKM_EXMODE)
486 flags &= ~(LKM_VALBLK|LKM_PUT_LVB);
488 /* unlockast only called on originating node */
489 if (flags & LKM_PUT_LVB) {
490 lksb->flags |= DLM_LKSB_PUT_LVB;
491 memcpy(&lksb->lvb[0], &unlock->lvb[0], DLM_LVB_LEN);
494 /* if this is in-progress, propagate the DLM_FORWARD
495 * all the way back out */
496 status = dlmunlock_master(dlm, res, lock, lksb, flags, &ignore);
497 if (status == DLM_FORWARD)
498 mlog(0, "lockres is in progress\n");
500 if (flags & LKM_PUT_LVB)
501 lksb->flags &= ~DLM_LKSB_PUT_LVB;
503 dlm_lockres_calc_usage(dlm, res);
504 dlm_kick_thread(dlm, res);
508 mlog(ML_ERROR, "failed to find lock to unlock! "
510 dlm_get_lock_cookie_node(be64_to_cpu(unlock->cookie)),
511 dlm_get_lock_cookie_seq(be64_to_cpu(unlock->cookie)));
517 dlm_lockres_put(res);
525 static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm,
526 struct dlm_lock_resource *res,
527 struct dlm_lock *lock,
528 struct dlm_lockstatus *lksb,
531 enum dlm_status status;
533 if (dlm_lock_on_list(&res->blocked, lock)) {
534 /* cancel this outright */
536 *actions = (DLM_UNLOCK_CALL_AST |
537 DLM_UNLOCK_REMOVE_LOCK);
538 } else if (dlm_lock_on_list(&res->converting, lock)) {
539 /* cancel the request, put back on granted */
541 *actions = (DLM_UNLOCK_CALL_AST |
542 DLM_UNLOCK_REMOVE_LOCK |
543 DLM_UNLOCK_REGRANT_LOCK |
544 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
545 } else if (dlm_lock_on_list(&res->granted, lock)) {
546 /* too late, already granted. */
547 status = DLM_CANCELGRANT;
548 *actions = DLM_UNLOCK_CALL_AST;
550 mlog(ML_ERROR, "lock to cancel is not on any list!\n");
551 status = DLM_IVLOCKID;
557 static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm,
558 struct dlm_lock_resource *res,
559 struct dlm_lock *lock,
560 struct dlm_lockstatus *lksb,
563 enum dlm_status status;
566 if (!dlm_lock_on_list(&res->granted, lock)) {
571 /* unlock granted lock */
573 *actions = (DLM_UNLOCK_FREE_LOCK |
574 DLM_UNLOCK_CALL_AST |
575 DLM_UNLOCK_REMOVE_LOCK);
580 /* there seems to be no point in doing this async
581 * since (even for the remote case) there is really
582 * no work to queue up... so just do it and fire the
583 * unlockast by hand when done... */
584 enum dlm_status dlmunlock(struct dlm_ctxt *dlm, struct dlm_lockstatus *lksb,
585 int flags, dlm_astunlockfunc_t *unlockast, void *data)
587 enum dlm_status status;
588 struct dlm_lock_resource *res;
589 struct dlm_lock *lock = NULL;
590 int call_ast, is_master;
593 dlm_error(DLM_BADARGS);
597 if (flags & ~(LKM_CANCEL | LKM_VALBLK | LKM_INVVALBLK)) {
598 dlm_error(DLM_BADPARAM);
602 if ((flags & (LKM_VALBLK | LKM_CANCEL)) == (LKM_VALBLK | LKM_CANCEL)) {
603 mlog(0, "VALBLK given with CANCEL: ignoring VALBLK\n");
604 flags &= ~LKM_VALBLK;
607 if (!lksb->lockid || !lksb->lockid->lockres) {
608 dlm_error(DLM_BADPARAM);
618 dlm_lockres_get(res);
621 /* need to retry up here because owner may have changed */
622 mlog(0, "lock=%p res=%p\n", lock, res);
624 spin_lock(&res->spinlock);
625 is_master = (res->owner == dlm->node_num);
626 if (flags & LKM_VALBLK && lock->ml.type != LKM_EXMODE)
627 flags &= ~LKM_VALBLK;
628 spin_unlock(&res->spinlock);
631 status = dlmunlock_master(dlm, res, lock, lksb, flags,
633 mlog(0, "done calling dlmunlock_master: returned %d, "
634 "call_ast is %d\n", status, call_ast);
636 status = dlmunlock_remote(dlm, res, lock, lksb, flags,
638 mlog(0, "done calling dlmunlock_remote: returned %d, "
639 "call_ast is %d\n", status, call_ast);
642 if (status == DLM_RECOVERING ||
643 status == DLM_MIGRATING ||
644 status == DLM_FORWARD ||
645 status == DLM_NOLOCKMGR) {
647 /* We want to go away for a tiny bit to allow recovery
648 * / migration to complete on this resource. I don't
649 * know of any wait queue we could sleep on as this
650 * may be happening on another node. Perhaps the
651 * proper solution is to queue up requests on the
654 /* do we want to yield(); ?? */
657 mlog(0, "retrying unlock due to pending recovery/"
658 "migration/in-progress/reconnect\n");
663 mlog(0, "calling unlockast(%p, %d)\n", data, status);
665 /* it is possible that there is one last bast
666 * pending. make sure it is flushed, then
667 * call the unlockast.
668 * not an issue if this is a mastered remotely,
669 * since this lock has been removed from the
670 * lockres queues and cannot be found. */
671 dlm_kick_thread(dlm, NULL);
672 wait_event(dlm->ast_wq,
673 dlm_lock_basts_flushed(dlm, lock));
675 (*unlockast)(data, status);
678 if (status == DLM_CANCELGRANT)
681 if (status == DLM_NORMAL) {
682 mlog(0, "kicking the thread\n");
683 dlm_kick_thread(dlm, res);
687 dlm_lockres_calc_usage(dlm, res);
688 dlm_lockres_put(res);
691 mlog(0, "returning status=%d!\n", status);
694 EXPORT_SYMBOL_GPL(dlmunlock);