GNU Linux-libre 6.9-gnu
[releases.git] / fs / dlm / plock.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
4  */
5
6 #include <linux/fs.h>
7 #include <linux/filelock.h>
8 #include <linux/miscdevice.h>
9 #include <linux/poll.h>
10 #include <linux/dlm.h>
11 #include <linux/dlm_plock.h>
12 #include <linux/slab.h>
13
14 #include <trace/events/dlm.h>
15
16 #include "dlm_internal.h"
17 #include "lockspace.h"
18
19 static DEFINE_SPINLOCK(ops_lock);
20 static LIST_HEAD(send_list);
21 static LIST_HEAD(recv_list);
22 static DECLARE_WAIT_QUEUE_HEAD(send_wq);
23 static DECLARE_WAIT_QUEUE_HEAD(recv_wq);
24
25 struct plock_async_data {
26         void *fl;
27         void *file;
28         struct file_lock flc;
29         int (*callback)(struct file_lock *fl, int result);
30 };
31
32 struct plock_op {
33         struct list_head list;
34         int done;
35         struct dlm_plock_info info;
36         /* if set indicates async handling */
37         struct plock_async_data *data;
38 };
39
40 static inline void set_version(struct dlm_plock_info *info)
41 {
42         info->version[0] = DLM_PLOCK_VERSION_MAJOR;
43         info->version[1] = DLM_PLOCK_VERSION_MINOR;
44         info->version[2] = DLM_PLOCK_VERSION_PATCH;
45 }
46
47 static struct plock_op *plock_lookup_waiter(const struct dlm_plock_info *info)
48 {
49         struct plock_op *op = NULL, *iter;
50
51         list_for_each_entry(iter, &recv_list, list) {
52                 if (iter->info.fsid == info->fsid &&
53                     iter->info.number == info->number &&
54                     iter->info.owner == info->owner &&
55                     iter->info.pid == info->pid &&
56                     iter->info.start == info->start &&
57                     iter->info.end == info->end &&
58                     iter->info.ex == info->ex &&
59                     iter->info.wait) {
60                         op = iter;
61                         break;
62                 }
63         }
64
65         return op;
66 }
67
68 static int check_version(struct dlm_plock_info *info)
69 {
70         if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
71             (DLM_PLOCK_VERSION_MINOR < info->version[1])) {
72                 log_print("plock device version mismatch: "
73                           "kernel (%u.%u.%u), user (%u.%u.%u)",
74                           DLM_PLOCK_VERSION_MAJOR,
75                           DLM_PLOCK_VERSION_MINOR,
76                           DLM_PLOCK_VERSION_PATCH,
77                           info->version[0],
78                           info->version[1],
79                           info->version[2]);
80                 return -EINVAL;
81         }
82         return 0;
83 }
84
85 static void dlm_release_plock_op(struct plock_op *op)
86 {
87         kfree(op->data);
88         kfree(op);
89 }
90
91 static void send_op(struct plock_op *op)
92 {
93         set_version(&op->info);
94         spin_lock(&ops_lock);
95         list_add_tail(&op->list, &send_list);
96         spin_unlock(&ops_lock);
97         wake_up(&send_wq);
98 }
99
100 static int do_lock_cancel(const struct dlm_plock_info *orig_info)
101 {
102         struct plock_op *op;
103         int rv;
104
105         op = kzalloc(sizeof(*op), GFP_NOFS);
106         if (!op)
107                 return -ENOMEM;
108
109         op->info = *orig_info;
110         op->info.optype = DLM_PLOCK_OP_CANCEL;
111         op->info.wait = 0;
112
113         send_op(op);
114         wait_event(recv_wq, (op->done != 0));
115
116         rv = op->info.rv;
117
118         dlm_release_plock_op(op);
119         return rv;
120 }
121
122 int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
123                    int cmd, struct file_lock *fl)
124 {
125         struct plock_async_data *op_data;
126         struct dlm_ls *ls;
127         struct plock_op *op;
128         int rv;
129
130         ls = dlm_find_lockspace_local(lockspace);
131         if (!ls)
132                 return -EINVAL;
133
134         op = kzalloc(sizeof(*op), GFP_NOFS);
135         if (!op) {
136                 rv = -ENOMEM;
137                 goto out;
138         }
139
140         op->info.optype         = DLM_PLOCK_OP_LOCK;
141         op->info.pid            = fl->c.flc_pid;
142         op->info.ex             = lock_is_write(fl);
143         op->info.wait           = !!(fl->c.flc_flags & FL_SLEEP);
144         op->info.fsid           = ls->ls_global_id;
145         op->info.number         = number;
146         op->info.start          = fl->fl_start;
147         op->info.end            = fl->fl_end;
148         op->info.owner = (__u64)(long) fl->c.flc_owner;
149         /* async handling */
150         if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
151                 op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
152                 if (!op_data) {
153                         dlm_release_plock_op(op);
154                         rv = -ENOMEM;
155                         goto out;
156                 }
157
158                 op_data->callback = fl->fl_lmops->lm_grant;
159                 locks_init_lock(&op_data->flc);
160                 locks_copy_lock(&op_data->flc, fl);
161                 op_data->fl             = fl;
162                 op_data->file   = file;
163
164                 op->data = op_data;
165
166                 send_op(op);
167                 rv = FILE_LOCK_DEFERRED;
168                 goto out;
169         }
170
171         send_op(op);
172
173         if (op->info.wait) {
174                 rv = wait_event_interruptible(recv_wq, (op->done != 0));
175                 if (rv == -ERESTARTSYS) {
176                         spin_lock(&ops_lock);
177                         /* recheck under ops_lock if we got a done != 0,
178                          * if so this interrupt case should be ignored
179                          */
180                         if (op->done != 0) {
181                                 spin_unlock(&ops_lock);
182                                 goto do_lock_wait;
183                         }
184                         spin_unlock(&ops_lock);
185
186                         rv = do_lock_cancel(&op->info);
187                         switch (rv) {
188                         case 0:
189                                 /* waiter was deleted in user space, answer will never come
190                                  * remove original request. The original request must be
191                                  * on recv_list because the answer of do_lock_cancel()
192                                  * synchronized it.
193                                  */
194                                 spin_lock(&ops_lock);
195                                 list_del(&op->list);
196                                 spin_unlock(&ops_lock);
197                                 rv = -EINTR;
198                                 break;
199                         case -ENOENT:
200                                 /* cancellation wasn't successful but op should be done */
201                                 fallthrough;
202                         default:
203                                 /* internal error doing cancel we need to wait */
204                                 goto wait;
205                         }
206
207                         log_debug(ls, "%s: wait interrupted %x %llx pid %d",
208                                   __func__, ls->ls_global_id,
209                                   (unsigned long long)number, op->info.pid);
210                         dlm_release_plock_op(op);
211                         goto out;
212                 }
213         } else {
214 wait:
215                 wait_event(recv_wq, (op->done != 0));
216         }
217
218 do_lock_wait:
219
220         WARN_ON(!list_empty(&op->list));
221
222         rv = op->info.rv;
223
224         if (!rv) {
225                 if (locks_lock_file_wait(file, fl) < 0)
226                         log_error(ls, "dlm_posix_lock: vfs lock error %llx",
227                                   (unsigned long long)number);
228         }
229
230         dlm_release_plock_op(op);
231 out:
232         dlm_put_lockspace(ls);
233         return rv;
234 }
235 EXPORT_SYMBOL_GPL(dlm_posix_lock);
236
237 /* Returns failure iff a successful lock operation should be canceled */
238 static int dlm_plock_callback(struct plock_op *op)
239 {
240         struct plock_async_data *op_data = op->data;
241         struct file *file;
242         struct file_lock *fl;
243         struct file_lock *flc;
244         int (*notify)(struct file_lock *fl, int result) = NULL;
245         int rv = 0;
246
247         WARN_ON(!list_empty(&op->list));
248
249         /* check if the following 2 are still valid or make a copy */
250         file = op_data->file;
251         flc = &op_data->flc;
252         fl = op_data->fl;
253         notify = op_data->callback;
254
255         if (op->info.rv) {
256                 notify(fl, op->info.rv);
257                 goto out;
258         }
259
260         /* got fs lock; bookkeep locally as well: */
261         flc->c.flc_flags &= ~FL_SLEEP;
262         if (posix_lock_file(file, flc, NULL)) {
263                 /*
264                  * This can only happen in the case of kmalloc() failure.
265                  * The filesystem's own lock is the authoritative lock,
266                  * so a failure to get the lock locally is not a disaster.
267                  * As long as the fs cannot reliably cancel locks (especially
268                  * in a low-memory situation), we're better off ignoring
269                  * this failure than trying to recover.
270                  */
271                 log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
272                           (unsigned long long)op->info.number, file, fl);
273         }
274
275         rv = notify(fl, 0);
276         if (rv) {
277                 /* XXX: We need to cancel the fs lock here: */
278                 log_print("%s: lock granted after lock request failed; dangling lock!",
279                           __func__);
280                 goto out;
281         }
282
283 out:
284         dlm_release_plock_op(op);
285         return rv;
286 }
287
288 int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
289                      struct file_lock *fl)
290 {
291         struct dlm_ls *ls;
292         struct plock_op *op;
293         int rv;
294         unsigned char saved_flags = fl->c.flc_flags;
295
296         ls = dlm_find_lockspace_local(lockspace);
297         if (!ls)
298                 return -EINVAL;
299
300         op = kzalloc(sizeof(*op), GFP_NOFS);
301         if (!op) {
302                 rv = -ENOMEM;
303                 goto out;
304         }
305
306         /* cause the vfs unlock to return ENOENT if lock is not found */
307         fl->c.flc_flags |= FL_EXISTS;
308
309         rv = locks_lock_file_wait(file, fl);
310         if (rv == -ENOENT) {
311                 rv = 0;
312                 goto out_free;
313         }
314         if (rv < 0) {
315                 log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx",
316                           rv, (unsigned long long)number);
317         }
318
319         op->info.optype         = DLM_PLOCK_OP_UNLOCK;
320         op->info.pid            = fl->c.flc_pid;
321         op->info.fsid           = ls->ls_global_id;
322         op->info.number         = number;
323         op->info.start          = fl->fl_start;
324         op->info.end            = fl->fl_end;
325         op->info.owner = (__u64)(long) fl->c.flc_owner;
326
327         if (fl->c.flc_flags & FL_CLOSE) {
328                 op->info.flags |= DLM_PLOCK_FL_CLOSE;
329                 send_op(op);
330                 rv = 0;
331                 goto out;
332         }
333
334         send_op(op);
335         wait_event(recv_wq, (op->done != 0));
336
337         WARN_ON(!list_empty(&op->list));
338
339         rv = op->info.rv;
340
341         if (rv == -ENOENT)
342                 rv = 0;
343
344 out_free:
345         dlm_release_plock_op(op);
346 out:
347         dlm_put_lockspace(ls);
348         fl->c.flc_flags = saved_flags;
349         return rv;
350 }
351 EXPORT_SYMBOL_GPL(dlm_posix_unlock);
352
353 /*
354  * NOTE: This implementation can only handle async lock requests as nfs
355  * do it. It cannot handle cancellation of a pending lock request sitting
356  * in wait_event(), but for now only nfs is the only user local kernel
357  * user.
358  */
359 int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file *file,
360                      struct file_lock *fl)
361 {
362         struct dlm_plock_info info;
363         struct plock_op *op;
364         struct dlm_ls *ls;
365         int rv;
366
367         /* this only works for async request for now and nfs is the only
368          * kernel user right now.
369          */
370         if (WARN_ON_ONCE(!fl->fl_lmops || !fl->fl_lmops->lm_grant))
371                 return -EOPNOTSUPP;
372
373         ls = dlm_find_lockspace_local(lockspace);
374         if (!ls)
375                 return -EINVAL;
376
377         memset(&info, 0, sizeof(info));
378         info.pid = fl->c.flc_pid;
379         info.ex = lock_is_write(fl);
380         info.fsid = ls->ls_global_id;
381         dlm_put_lockspace(ls);
382         info.number = number;
383         info.start = fl->fl_start;
384         info.end = fl->fl_end;
385         info.owner = (__u64)(long) fl->c.flc_owner;
386
387         rv = do_lock_cancel(&info);
388         switch (rv) {
389         case 0:
390                 spin_lock(&ops_lock);
391                 /* lock request to cancel must be on recv_list because
392                  * do_lock_cancel() synchronizes it.
393                  */
394                 op = plock_lookup_waiter(&info);
395                 if (WARN_ON_ONCE(!op)) {
396                         spin_unlock(&ops_lock);
397                         rv = -ENOLCK;
398                         break;
399                 }
400
401                 list_del(&op->list);
402                 spin_unlock(&ops_lock);
403                 WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
404                 op->data->callback(op->data->fl, -EINTR);
405                 dlm_release_plock_op(op);
406                 rv = -EINTR;
407                 break;
408         case -ENOENT:
409                 /* if cancel wasn't successful we probably were to late
410                  * or it was a non-blocking lock request, so just unlock it.
411                  */
412                 rv = dlm_posix_unlock(lockspace, number, file, fl);
413                 break;
414         default:
415                 break;
416         }
417
418         return rv;
419 }
420 EXPORT_SYMBOL_GPL(dlm_posix_cancel);
421
422 int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
423                   struct file_lock *fl)
424 {
425         struct dlm_ls *ls;
426         struct plock_op *op;
427         int rv;
428
429         ls = dlm_find_lockspace_local(lockspace);
430         if (!ls)
431                 return -EINVAL;
432
433         op = kzalloc(sizeof(*op), GFP_NOFS);
434         if (!op) {
435                 rv = -ENOMEM;
436                 goto out;
437         }
438
439         op->info.optype         = DLM_PLOCK_OP_GET;
440         op->info.pid            = fl->c.flc_pid;
441         op->info.ex             = lock_is_write(fl);
442         op->info.fsid           = ls->ls_global_id;
443         op->info.number         = number;
444         op->info.start          = fl->fl_start;
445         op->info.end            = fl->fl_end;
446         op->info.owner = (__u64)(long) fl->c.flc_owner;
447
448         send_op(op);
449         wait_event(recv_wq, (op->done != 0));
450
451         WARN_ON(!list_empty(&op->list));
452
453         /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
454            -ENOENT if there are no locks on the file */
455
456         rv = op->info.rv;
457
458         fl->c.flc_type = F_UNLCK;
459         if (rv == -ENOENT)
460                 rv = 0;
461         else if (rv > 0) {
462                 locks_init_lock(fl);
463                 fl->c.flc_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
464                 fl->c.flc_flags = FL_POSIX;
465                 fl->c.flc_pid = op->info.pid;
466                 if (op->info.nodeid != dlm_our_nodeid())
467                         fl->c.flc_pid = -fl->c.flc_pid;
468                 fl->fl_start = op->info.start;
469                 fl->fl_end = op->info.end;
470                 rv = 0;
471         }
472
473         dlm_release_plock_op(op);
474 out:
475         dlm_put_lockspace(ls);
476         return rv;
477 }
478 EXPORT_SYMBOL_GPL(dlm_posix_get);
479
480 /* a read copies out one plock request from the send list */
481 static ssize_t dev_read(struct file *file, char __user *u, size_t count,
482                         loff_t *ppos)
483 {
484         struct dlm_plock_info info;
485         struct plock_op *op = NULL;
486
487         if (count < sizeof(info))
488                 return -EINVAL;
489
490         spin_lock(&ops_lock);
491         if (!list_empty(&send_list)) {
492                 op = list_first_entry(&send_list, struct plock_op, list);
493                 if (op->info.flags & DLM_PLOCK_FL_CLOSE)
494                         list_del(&op->list);
495                 else
496                         list_move_tail(&op->list, &recv_list);
497                 memcpy(&info, &op->info, sizeof(info));
498         }
499         spin_unlock(&ops_lock);
500
501         if (!op)
502                 return -EAGAIN;
503
504         trace_dlm_plock_read(&info);
505
506         /* there is no need to get a reply from userspace for unlocks
507            that were generated by the vfs cleaning up for a close
508            (the process did not make an unlock call). */
509
510         if (op->info.flags & DLM_PLOCK_FL_CLOSE)
511                 dlm_release_plock_op(op);
512
513         if (copy_to_user(u, &info, sizeof(info)))
514                 return -EFAULT;
515         return sizeof(info);
516 }
517
518 /* a write copies in one plock result that should match a plock_op
519    on the recv list */
520 static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
521                          loff_t *ppos)
522 {
523         struct plock_op *op = NULL, *iter;
524         struct dlm_plock_info info;
525         int do_callback = 0;
526
527         if (count != sizeof(info))
528                 return -EINVAL;
529
530         if (copy_from_user(&info, u, sizeof(info)))
531                 return -EFAULT;
532
533         trace_dlm_plock_write(&info);
534
535         if (check_version(&info))
536                 return -EINVAL;
537
538         /*
539          * The results for waiting ops (SETLKW) can be returned in any
540          * order, so match all fields to find the op.  The results for
541          * non-waiting ops are returned in the order that they were sent
542          * to userspace, so match the result with the first non-waiting op.
543          */
544         spin_lock(&ops_lock);
545         if (info.wait) {
546                 op = plock_lookup_waiter(&info);
547         } else {
548                 list_for_each_entry(iter, &recv_list, list) {
549                         if (!iter->info.wait &&
550                             iter->info.fsid == info.fsid) {
551                                 op = iter;
552                                 break;
553                         }
554                 }
555         }
556
557         if (op) {
558                 /* Sanity check that op and info match. */
559                 if (info.wait)
560                         WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
561                 else
562                         WARN_ON(op->info.number != info.number ||
563                                 op->info.owner != info.owner ||
564                                 op->info.optype != info.optype);
565
566                 list_del_init(&op->list);
567                 memcpy(&op->info, &info, sizeof(info));
568                 if (op->data)
569                         do_callback = 1;
570                 else
571                         op->done = 1;
572         }
573         spin_unlock(&ops_lock);
574
575         if (op) {
576                 if (do_callback)
577                         dlm_plock_callback(op);
578                 else
579                         wake_up(&recv_wq);
580         } else
581                 pr_debug("%s: no op %x %llx", __func__,
582                          info.fsid, (unsigned long long)info.number);
583         return count;
584 }
585
586 static __poll_t dev_poll(struct file *file, poll_table *wait)
587 {
588         __poll_t mask = 0;
589
590         poll_wait(file, &send_wq, wait);
591
592         spin_lock(&ops_lock);
593         if (!list_empty(&send_list))
594                 mask = EPOLLIN | EPOLLRDNORM;
595         spin_unlock(&ops_lock);
596
597         return mask;
598 }
599
600 static const struct file_operations dev_fops = {
601         .read    = dev_read,
602         .write   = dev_write,
603         .poll    = dev_poll,
604         .owner   = THIS_MODULE,
605         .llseek  = noop_llseek,
606 };
607
608 static struct miscdevice plock_dev_misc = {
609         .minor = MISC_DYNAMIC_MINOR,
610         .name = DLM_PLOCK_MISC_NAME,
611         .fops = &dev_fops
612 };
613
614 int dlm_plock_init(void)
615 {
616         int rv;
617
618         rv = misc_register(&plock_dev_misc);
619         if (rv)
620                 log_print("dlm_plock_init: misc_register failed %d", rv);
621         return rv;
622 }
623
624 void dlm_plock_exit(void)
625 {
626         misc_deregister(&plock_dev_misc);
627         WARN_ON(!list_empty(&send_list));
628         WARN_ON(!list_empty(&recv_list));
629 }
630