1 /******************************************************************************
2 *******************************************************************************
4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5 ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
7 ** This copyrighted material is made available to anyone wishing to use,
8 ** modify, copy, or redistribute it subject to the terms and conditions
9 ** of the GNU General Public License v.2.
11 *******************************************************************************
12 ******************************************************************************/
14 #include "dlm_internal.h"
15 #include "lockspace.h"
24 #include "requestqueue.h"
29 static struct mutex ls_lock;
30 static struct list_head lslist;
31 static spinlock_t lslist_lock;
32 static struct task_struct * scand_task;
35 static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
39 int rc = kstrtoint(buf, 0, &n);
43 ls = dlm_find_lockspace_local(ls->ls_local_handle);
57 dlm_put_lockspace(ls);
61 static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len)
63 int rc = kstrtoint(buf, 0, &ls->ls_uevent_result);
67 set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags);
68 wake_up(&ls->ls_uevent_wait);
72 static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf)
74 return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id);
77 static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len)
79 int rc = kstrtouint(buf, 0, &ls->ls_global_id);
86 static ssize_t dlm_nodir_show(struct dlm_ls *ls, char *buf)
88 return snprintf(buf, PAGE_SIZE, "%u\n", dlm_no_directory(ls));
91 static ssize_t dlm_nodir_store(struct dlm_ls *ls, const char *buf, size_t len)
94 int rc = kstrtoint(buf, 0, &val);
99 set_bit(LSFL_NODIR, &ls->ls_flags);
103 static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf)
105 uint32_t status = dlm_recover_status(ls);
106 return snprintf(buf, PAGE_SIZE, "%x\n", status);
109 static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf)
111 return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid);
115 struct attribute attr;
116 ssize_t (*show)(struct dlm_ls *, char *);
117 ssize_t (*store)(struct dlm_ls *, const char *, size_t);
120 static struct dlm_attr dlm_attr_control = {
121 .attr = {.name = "control", .mode = S_IWUSR},
122 .store = dlm_control_store
125 static struct dlm_attr dlm_attr_event = {
126 .attr = {.name = "event_done", .mode = S_IWUSR},
127 .store = dlm_event_store
130 static struct dlm_attr dlm_attr_id = {
131 .attr = {.name = "id", .mode = S_IRUGO | S_IWUSR},
133 .store = dlm_id_store
136 static struct dlm_attr dlm_attr_nodir = {
137 .attr = {.name = "nodir", .mode = S_IRUGO | S_IWUSR},
138 .show = dlm_nodir_show,
139 .store = dlm_nodir_store
142 static struct dlm_attr dlm_attr_recover_status = {
143 .attr = {.name = "recover_status", .mode = S_IRUGO},
144 .show = dlm_recover_status_show
147 static struct dlm_attr dlm_attr_recover_nodeid = {
148 .attr = {.name = "recover_nodeid", .mode = S_IRUGO},
149 .show = dlm_recover_nodeid_show
152 static struct attribute *dlm_attrs[] = {
153 &dlm_attr_control.attr,
154 &dlm_attr_event.attr,
156 &dlm_attr_nodir.attr,
157 &dlm_attr_recover_status.attr,
158 &dlm_attr_recover_nodeid.attr,
162 static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr,
165 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
166 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
167 return a->show ? a->show(ls, buf) : 0;
170 static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
171 const char *buf, size_t len)
173 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
174 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
175 return a->store ? a->store(ls, buf, len) : len;
178 static void lockspace_kobj_release(struct kobject *k)
180 struct dlm_ls *ls = container_of(k, struct dlm_ls, ls_kobj);
184 static const struct sysfs_ops dlm_attr_ops = {
185 .show = dlm_attr_show,
186 .store = dlm_attr_store,
189 static struct kobj_type dlm_ktype = {
190 .default_attrs = dlm_attrs,
191 .sysfs_ops = &dlm_attr_ops,
192 .release = lockspace_kobj_release,
195 static struct kset *dlm_kset;
197 static int do_uevent(struct dlm_ls *ls, int in)
202 kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
204 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
206 log_rinfo(ls, "%s the lockspace group...", in ? "joining" : "leaving");
208 /* dlm_controld will see the uevent, do the necessary group management
209 and then write to sysfs to wake us */
211 error = wait_event_interruptible(ls->ls_uevent_wait,
212 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
214 log_rinfo(ls, "group event done %d %d", error, ls->ls_uevent_result);
219 error = ls->ls_uevent_result;
222 log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
223 error, ls->ls_uevent_result);
227 static int dlm_uevent(struct kset *kset, struct kobject *kobj,
228 struct kobj_uevent_env *env)
230 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
232 add_uevent_var(env, "LOCKSPACE=%s", ls->ls_name);
236 static struct kset_uevent_ops dlm_uevent_ops = {
237 .uevent = dlm_uevent,
240 int __init dlm_lockspace_init(void)
243 mutex_init(&ls_lock);
244 INIT_LIST_HEAD(&lslist);
245 spin_lock_init(&lslist_lock);
247 dlm_kset = kset_create_and_add("dlm", &dlm_uevent_ops, kernel_kobj);
249 printk(KERN_WARNING "%s: can not create kset\n", __func__);
255 void dlm_lockspace_exit(void)
257 kset_unregister(dlm_kset);
260 static struct dlm_ls *find_ls_to_scan(void)
264 spin_lock(&lslist_lock);
265 list_for_each_entry(ls, &lslist, ls_list) {
266 if (time_after_eq(jiffies, ls->ls_scan_time +
267 dlm_config.ci_scan_secs * HZ)) {
268 spin_unlock(&lslist_lock);
272 spin_unlock(&lslist_lock);
276 static int dlm_scand(void *data)
280 while (!kthread_should_stop()) {
281 ls = find_ls_to_scan();
283 if (dlm_lock_recovery_try(ls)) {
284 ls->ls_scan_time = jiffies;
286 dlm_scan_timeout(ls);
287 dlm_scan_waiters(ls);
288 dlm_unlock_recovery(ls);
290 ls->ls_scan_time += HZ;
294 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
299 static int dlm_scand_start(void)
301 struct task_struct *p;
304 p = kthread_run(dlm_scand, NULL, "dlm_scand");
312 static void dlm_scand_stop(void)
314 kthread_stop(scand_task);
317 struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
321 spin_lock(&lslist_lock);
323 list_for_each_entry(ls, &lslist, ls_list) {
324 if (ls->ls_global_id == id) {
331 spin_unlock(&lslist_lock);
335 struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
339 spin_lock(&lslist_lock);
340 list_for_each_entry(ls, &lslist, ls_list) {
341 if (ls->ls_local_handle == lockspace) {
348 spin_unlock(&lslist_lock);
352 struct dlm_ls *dlm_find_lockspace_device(int minor)
356 spin_lock(&lslist_lock);
357 list_for_each_entry(ls, &lslist, ls_list) {
358 if (ls->ls_device.minor == minor) {
365 spin_unlock(&lslist_lock);
369 void dlm_put_lockspace(struct dlm_ls *ls)
371 spin_lock(&lslist_lock);
373 spin_unlock(&lslist_lock);
376 static void remove_lockspace(struct dlm_ls *ls)
379 spin_lock(&lslist_lock);
380 if (ls->ls_count == 0) {
381 WARN_ON(ls->ls_create_count != 0);
382 list_del(&ls->ls_list);
383 spin_unlock(&lslist_lock);
386 spin_unlock(&lslist_lock);
391 static int threads_start(void)
395 error = dlm_scand_start();
397 log_print("cannot start dlm_scand thread %d", error);
401 /* Thread for sending/receiving messages for all lockspace's */
402 error = dlm_lowcomms_start();
404 log_print("cannot start dlm lowcomms %d", error);
416 static void threads_stop(void)
422 static int new_lockspace(const char *name, const char *cluster,
423 uint32_t flags, int lvblen,
424 const struct dlm_lockspace_ops *ops, void *ops_arg,
425 int *ops_result, dlm_lockspace_t **lockspace)
430 int namelen = strlen(name);
432 if (namelen > DLM_LOCKSPACE_LEN)
435 if (!lvblen || (lvblen % 8))
438 if (!try_module_get(THIS_MODULE))
441 if (!dlm_user_daemon_available()) {
442 log_print("dlm user daemon not available");
447 if (ops && ops_result) {
448 if (!dlm_config.ci_recover_callbacks)
449 *ops_result = -EOPNOTSUPP;
454 if (dlm_config.ci_recover_callbacks && cluster &&
455 strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) {
456 log_print("dlm cluster name %s mismatch %s",
457 dlm_config.ci_cluster_name, cluster);
464 spin_lock(&lslist_lock);
465 list_for_each_entry(ls, &lslist, ls_list) {
466 WARN_ON(ls->ls_create_count <= 0);
467 if (ls->ls_namelen != namelen)
469 if (memcmp(ls->ls_name, name, namelen))
471 if (flags & DLM_LSFL_NEWEXCL) {
475 ls->ls_create_count++;
480 spin_unlock(&lslist_lock);
487 ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_NOFS);
490 memcpy(ls->ls_name, name, namelen);
491 ls->ls_namelen = namelen;
492 ls->ls_lvblen = lvblen;
495 ls->ls_scan_time = jiffies;
497 if (ops && dlm_config.ci_recover_callbacks) {
499 ls->ls_ops_arg = ops_arg;
502 if (flags & DLM_LSFL_TIMEWARN)
503 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
505 /* ls_exflags are forced to match among nodes, and we don't
506 need to require all nodes to have some flags set */
507 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
510 size = dlm_config.ci_rsbtbl_size;
511 ls->ls_rsbtbl_size = size;
513 ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size);
516 for (i = 0; i < size; i++) {
517 ls->ls_rsbtbl[i].keep.rb_node = NULL;
518 ls->ls_rsbtbl[i].toss.rb_node = NULL;
519 spin_lock_init(&ls->ls_rsbtbl[i].lock);
522 spin_lock_init(&ls->ls_remove_spin);
524 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
525 ls->ls_remove_names[i] = kzalloc(DLM_RESNAME_MAXLEN+1,
527 if (!ls->ls_remove_names[i])
531 idr_init(&ls->ls_lkbidr);
532 spin_lock_init(&ls->ls_lkbidr_spin);
534 INIT_LIST_HEAD(&ls->ls_waiters);
535 mutex_init(&ls->ls_waiters_mutex);
536 INIT_LIST_HEAD(&ls->ls_orphans);
537 mutex_init(&ls->ls_orphans_mutex);
538 INIT_LIST_HEAD(&ls->ls_timeout);
539 mutex_init(&ls->ls_timeout_mutex);
541 INIT_LIST_HEAD(&ls->ls_new_rsb);
542 spin_lock_init(&ls->ls_new_rsb_spin);
544 INIT_LIST_HEAD(&ls->ls_nodes);
545 INIT_LIST_HEAD(&ls->ls_nodes_gone);
546 ls->ls_num_nodes = 0;
547 ls->ls_low_nodeid = 0;
548 ls->ls_total_weight = 0;
549 ls->ls_node_array = NULL;
551 memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb));
552 ls->ls_stub_rsb.res_ls = ls;
554 ls->ls_debug_rsb_dentry = NULL;
555 ls->ls_debug_waiters_dentry = NULL;
557 init_waitqueue_head(&ls->ls_uevent_wait);
558 ls->ls_uevent_result = 0;
559 init_completion(&ls->ls_members_done);
560 ls->ls_members_result = -1;
562 mutex_init(&ls->ls_cb_mutex);
563 INIT_LIST_HEAD(&ls->ls_cb_delay);
565 ls->ls_recoverd_task = NULL;
566 mutex_init(&ls->ls_recoverd_active);
567 spin_lock_init(&ls->ls_recover_lock);
568 spin_lock_init(&ls->ls_rcom_spin);
569 get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
570 ls->ls_recover_status = 0;
571 ls->ls_recover_seq = 0;
572 ls->ls_recover_args = NULL;
573 init_rwsem(&ls->ls_in_recovery);
574 init_rwsem(&ls->ls_recv_active);
575 INIT_LIST_HEAD(&ls->ls_requestqueue);
576 mutex_init(&ls->ls_requestqueue_mutex);
577 mutex_init(&ls->ls_clear_proc_locks);
579 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
580 if (!ls->ls_recover_buf)
584 ls->ls_num_slots = 0;
585 ls->ls_slots_size = 0;
588 INIT_LIST_HEAD(&ls->ls_recover_list);
589 spin_lock_init(&ls->ls_recover_list_lock);
590 idr_init(&ls->ls_recover_idr);
591 spin_lock_init(&ls->ls_recover_idr_lock);
592 ls->ls_recover_list_count = 0;
593 ls->ls_local_handle = ls;
594 init_waitqueue_head(&ls->ls_wait_general);
595 INIT_LIST_HEAD(&ls->ls_root_list);
596 init_rwsem(&ls->ls_root_sem);
598 spin_lock(&lslist_lock);
599 ls->ls_create_count = 1;
600 list_add(&ls->ls_list, &lslist);
601 spin_unlock(&lslist_lock);
603 if (flags & DLM_LSFL_FS) {
604 error = dlm_callback_start(ls);
606 log_error(ls, "can't start dlm_callback %d", error);
611 init_waitqueue_head(&ls->ls_recover_lock_wait);
614 * Once started, dlm_recoverd first looks for ls in lslist, then
615 * initializes ls_in_recovery as locked in "down" mode. We need
616 * to wait for the wakeup from dlm_recoverd because in_recovery
617 * has to start out in down mode.
620 error = dlm_recoverd_start(ls);
622 log_error(ls, "can't start dlm_recoverd %d", error);
626 wait_event(ls->ls_recover_lock_wait,
627 test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
629 /* let kobject handle freeing of ls if there's an error */
632 ls->ls_kobj.kset = dlm_kset;
633 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
637 kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
639 /* This uevent triggers dlm_controld in userspace to add us to the
640 group of nodes that are members of this lockspace (managed by the
641 cluster infrastructure.) Once it's done that, it tells us who the
642 current lockspace members are (via configfs) and then tells the
643 lockspace to start running (via sysfs) in dlm_ls_start(). */
645 error = do_uevent(ls, 1);
649 wait_for_completion(&ls->ls_members_done);
650 error = ls->ls_members_result;
654 dlm_create_debug_file(ls);
656 log_rinfo(ls, "join complete");
662 dlm_clear_members(ls);
663 kfree(ls->ls_node_array);
665 dlm_recoverd_stop(ls);
667 dlm_callback_stop(ls);
669 spin_lock(&lslist_lock);
670 list_del(&ls->ls_list);
671 spin_unlock(&lslist_lock);
672 idr_destroy(&ls->ls_recover_idr);
673 kfree(ls->ls_recover_buf);
675 idr_destroy(&ls->ls_lkbidr);
677 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
678 if (ls->ls_remove_names[i])
679 kfree(ls->ls_remove_names[i]);
681 vfree(ls->ls_rsbtbl);
684 kobject_put(&ls->ls_kobj);
688 module_put(THIS_MODULE);
692 int dlm_new_lockspace(const char *name, const char *cluster,
693 uint32_t flags, int lvblen,
694 const struct dlm_lockspace_ops *ops, void *ops_arg,
695 int *ops_result, dlm_lockspace_t **lockspace)
699 mutex_lock(&ls_lock);
701 error = threads_start();
705 error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg,
706 ops_result, lockspace);
714 mutex_unlock(&ls_lock);
718 static int lkb_idr_is_local(int id, void *p, void *data)
720 struct dlm_lkb *lkb = p;
722 return lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV;
725 static int lkb_idr_is_any(int id, void *p, void *data)
730 static int lkb_idr_free(int id, void *p, void *data)
732 struct dlm_lkb *lkb = p;
734 if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
735 dlm_free_lvb(lkb->lkb_lvbptr);
741 /* NOTE: We check the lkbidr here rather than the resource table.
742 This is because there may be LKBs queued as ASTs that have been unlinked
743 from their RSBs and are pending deletion once the AST has been delivered */
745 static int lockspace_busy(struct dlm_ls *ls, int force)
749 spin_lock(&ls->ls_lkbidr_spin);
751 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
752 } else if (force == 1) {
753 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
757 spin_unlock(&ls->ls_lkbidr_spin);
761 static int release_lockspace(struct dlm_ls *ls, int force)
767 busy = lockspace_busy(ls, force);
769 spin_lock(&lslist_lock);
770 if (ls->ls_create_count == 1) {
774 /* remove_lockspace takes ls off lslist */
775 ls->ls_create_count = 0;
778 } else if (ls->ls_create_count > 1) {
779 rv = --ls->ls_create_count;
783 spin_unlock(&lslist_lock);
786 log_debug(ls, "release_lockspace no remove %d", rv);
790 dlm_device_deregister(ls);
792 if (force < 3 && dlm_user_daemon_available())
795 dlm_recoverd_stop(ls);
797 dlm_callback_stop(ls);
799 remove_lockspace(ls);
801 dlm_delete_debug_file(ls);
803 idr_destroy(&ls->ls_recover_idr);
804 kfree(ls->ls_recover_buf);
807 * Free all lkb's in idr
810 idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
811 idr_destroy(&ls->ls_lkbidr);
814 * Free all rsb's on rsbtbl[] lists
817 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
818 while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) {
819 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
820 rb_erase(n, &ls->ls_rsbtbl[i].keep);
824 while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) {
825 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
826 rb_erase(n, &ls->ls_rsbtbl[i].toss);
831 vfree(ls->ls_rsbtbl);
833 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++)
834 kfree(ls->ls_remove_names[i]);
836 while (!list_empty(&ls->ls_new_rsb)) {
837 rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
839 list_del(&rsb->res_hashchain);
844 * Free structures on any other lists
847 dlm_purge_requestqueue(ls);
848 kfree(ls->ls_recover_args);
849 dlm_clear_members(ls);
850 dlm_clear_members_gone(ls);
851 kfree(ls->ls_node_array);
852 log_rinfo(ls, "release_lockspace final free");
853 kobject_put(&ls->ls_kobj);
854 /* The ls structure will be freed when the kobject is done with */
856 module_put(THIS_MODULE);
861 * Called when a system has released all its locks and is not going to use the
862 * lockspace any longer. We free everything we're managing for this lockspace.
863 * Remaining nodes will go through the recovery process as if we'd died. The
864 * lockspace must continue to function as usual, participating in recoveries,
865 * until this returns.
867 * Force has 4 possible values:
868 * 0 - don't destroy locksapce if it has any LKBs
869 * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
870 * 2 - destroy lockspace regardless of LKBs
871 * 3 - destroy lockspace as part of a forced shutdown
874 int dlm_release_lockspace(void *lockspace, int force)
879 ls = dlm_find_lockspace_local(lockspace);
882 dlm_put_lockspace(ls);
884 mutex_lock(&ls_lock);
885 error = release_lockspace(ls, force);
890 mutex_unlock(&ls_lock);
895 void dlm_stop_lockspaces(void)
902 spin_lock(&lslist_lock);
903 list_for_each_entry(ls, &lslist, ls_list) {
904 if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) {
908 spin_unlock(&lslist_lock);
909 log_error(ls, "no userland control daemon, stopping lockspace");
913 spin_unlock(&lslist_lock);
916 log_print("dlm user daemon left %d lockspaces", count);