GNU Linux-libre 4.9.317-gnu1
[releases.git] / fs / orangefs / devorangefs-req.c
1 /*
2  * (C) 2001 Clemson University and The University of Chicago
3  *
4  * Changes by Acxiom Corporation to add protocol version to kernel
5  * communication, Copyright Acxiom Corporation, 2005.
6  *
7  * See COPYING in top-level directory.
8  */
9
10 #include "protocol.h"
11 #include "orangefs-kernel.h"
12 #include "orangefs-dev-proto.h"
13 #include "orangefs-bufmap.h"
14 #include "orangefs-debugfs.h"
15
16 #include <linux/debugfs.h>
17 #include <linux/slab.h>
18
19 /* this file implements the /dev/pvfs2-req device node */
20
21 uint32_t orangefs_userspace_version;
22
23 static int open_access_count;
24
25 static DEFINE_MUTEX(devreq_mutex);
26
27 #define DUMP_DEVICE_ERROR()                                                   \
28 do {                                                                          \
29         gossip_err("*****************************************************\n");\
30         gossip_err("ORANGEFS Device Error:  You cannot open the device file ");  \
31         gossip_err("\n/dev/%s more than once.  Please make sure that\nthere " \
32                    "are no ", ORANGEFS_REQDEVICE_NAME);                          \
33         gossip_err("instances of a program using this device\ncurrently "     \
34                    "running. (You must verify this!)\n");                     \
35         gossip_err("For example, you can use the lsof program as follows:\n");\
36         gossip_err("'lsof | grep %s' (run this as root)\n",                   \
37                    ORANGEFS_REQDEVICE_NAME);                                     \
38         gossip_err("  open_access_count = %d\n", open_access_count);          \
39         gossip_err("*****************************************************\n");\
40 } while (0)
41
42 static int hash_func(__u64 tag, int table_size)
43 {
44         return do_div(tag, (unsigned int)table_size);
45 }
46
47 static void orangefs_devreq_add_op(struct orangefs_kernel_op_s *op)
48 {
49         int index = hash_func(op->tag, hash_table_size);
50
51         list_add_tail(&op->list, &orangefs_htable_ops_in_progress[index]);
52 }
53
54 /*
55  * find the op with this tag and remove it from the in progress
56  * hash table.
57  */
58 static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag)
59 {
60         struct orangefs_kernel_op_s *op, *next;
61         int index;
62
63         index = hash_func(tag, hash_table_size);
64
65         spin_lock(&orangefs_htable_ops_in_progress_lock);
66         list_for_each_entry_safe(op,
67                                  next,
68                                  &orangefs_htable_ops_in_progress[index],
69                                  list) {
70                 if (op->tag == tag && !op_state_purged(op) &&
71                     !op_state_given_up(op)) {
72                         list_del_init(&op->list);
73                         spin_unlock(&orangefs_htable_ops_in_progress_lock);
74                         return op;
75                 }
76         }
77
78         spin_unlock(&orangefs_htable_ops_in_progress_lock);
79         return NULL;
80 }
81
82 /* Returns whether any FS are still pending remounted */
83 static int mark_all_pending_mounts(void)
84 {
85         int unmounted = 1;
86         struct orangefs_sb_info_s *orangefs_sb = NULL;
87
88         spin_lock(&orangefs_superblocks_lock);
89         list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
90                 /* All of these file system require a remount */
91                 orangefs_sb->mount_pending = 1;
92                 unmounted = 0;
93         }
94         spin_unlock(&orangefs_superblocks_lock);
95         return unmounted;
96 }
97
98 /*
99  * Determine if a given file system needs to be remounted or not
100  *  Returns -1 on error
101  *           0 if already mounted
102  *           1 if needs remount
103  */
104 static int fs_mount_pending(__s32 fsid)
105 {
106         int mount_pending = -1;
107         struct orangefs_sb_info_s *orangefs_sb = NULL;
108
109         spin_lock(&orangefs_superblocks_lock);
110         list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
111                 if (orangefs_sb->fs_id == fsid) {
112                         mount_pending = orangefs_sb->mount_pending;
113                         break;
114                 }
115         }
116         spin_unlock(&orangefs_superblocks_lock);
117         return mount_pending;
118 }
119
120 static int orangefs_devreq_open(struct inode *inode, struct file *file)
121 {
122         int ret = -EINVAL;
123
124         /* in order to ensure that the filesystem driver sees correct UIDs */
125         if (file->f_cred->user_ns != &init_user_ns) {
126                 gossip_err("%s: device cannot be opened outside init_user_ns\n",
127                            __func__);
128                 goto out;
129         }
130
131         if (!(file->f_flags & O_NONBLOCK)) {
132                 gossip_err("%s: device cannot be opened in blocking mode\n",
133                            __func__);
134                 goto out;
135         }
136         ret = -EACCES;
137         gossip_debug(GOSSIP_DEV_DEBUG, "client-core: opening device\n");
138         mutex_lock(&devreq_mutex);
139
140         if (open_access_count == 0) {
141                 open_access_count = 1;
142                 ret = 0;
143         } else {
144                 DUMP_DEVICE_ERROR();
145         }
146         mutex_unlock(&devreq_mutex);
147
148 out:
149
150         gossip_debug(GOSSIP_DEV_DEBUG,
151                      "pvfs2-client-core: open device complete (ret = %d)\n",
152                      ret);
153         return ret;
154 }
155
156 /* Function for read() callers into the device */
157 static ssize_t orangefs_devreq_read(struct file *file,
158                                  char __user *buf,
159                                  size_t count, loff_t *offset)
160 {
161         struct orangefs_kernel_op_s *op, *temp;
162         __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION;
163         static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
164         struct orangefs_kernel_op_s *cur_op;
165         unsigned long ret;
166
167         /* We do not support blocking IO. */
168         if (!(file->f_flags & O_NONBLOCK)) {
169                 gossip_err("%s: blocking read from client-core.\n",
170                            __func__);
171                 return -EINVAL;
172         }
173
174         /*
175          * The client will do an ioctl to find MAX_DEV_REQ_UPSIZE, then
176          * always read with that size buffer.
177          */
178         if (count != MAX_DEV_REQ_UPSIZE) {
179                 gossip_err("orangefs: client-core tried to read wrong size\n");
180                 return -EINVAL;
181         }
182
183 restart:
184         cur_op = NULL;
185         /* Get next op (if any) from top of list. */
186         spin_lock(&orangefs_request_list_lock);
187         list_for_each_entry_safe(op, temp, &orangefs_request_list, list) {
188                 __s32 fsid;
189                 /* This lock is held past the end of the loop when we break. */
190                 spin_lock(&op->lock);
191                 if (unlikely(op_state_purged(op) || op_state_given_up(op))) {
192                         spin_unlock(&op->lock);
193                         continue;
194                 }
195
196                 fsid = fsid_of_op(op);
197                 if (fsid != ORANGEFS_FS_ID_NULL) {
198                         int ret;
199                         /* Skip ops whose filesystem needs to be mounted. */
200                         ret = fs_mount_pending(fsid);
201                         if (ret == 1) {
202                                 gossip_debug(GOSSIP_DEV_DEBUG,
203                                     "%s: mount pending, skipping op tag "
204                                     "%llu %s\n",
205                                     __func__,
206                                     llu(op->tag),
207                                     get_opname_string(op));
208                                 spin_unlock(&op->lock);
209                                 continue;
210                         /*
211                          * Skip ops whose filesystem we don't know about unless
212                          * it is being mounted or unmounted.  It is possible for
213                          * a filesystem we don't know about to be unmounted if
214                          * it fails to mount in the kernel after userspace has
215                          * been sent the mount request.
216                          */
217                         /* XXX: is there a better way to detect this? */
218                         } else if (ret == -1 &&
219                                    !(op->upcall.type ==
220                                         ORANGEFS_VFS_OP_FS_MOUNT ||
221                                      op->upcall.type ==
222                                         ORANGEFS_VFS_OP_GETATTR ||
223                                      op->upcall.type ==
224                                         ORANGEFS_VFS_OP_FS_UMOUNT)) {
225                                 gossip_debug(GOSSIP_DEV_DEBUG,
226                                     "orangefs: skipping op tag %llu %s\n",
227                                     llu(op->tag), get_opname_string(op));
228                                 gossip_err(
229                                     "orangefs: ERROR: fs_mount_pending %d\n",
230                                     fsid);
231                                 spin_unlock(&op->lock);
232                                 continue;
233                         }
234                 }
235                 /*
236                  * Either this op does not pertain to a filesystem, is mounting
237                  * a filesystem, or pertains to a mounted filesystem. Let it
238                  * through.
239                  */
240                 cur_op = op;
241                 break;
242         }
243
244         /*
245          * At this point we either have a valid op and can continue or have not
246          * found an op and must ask the client to try again later.
247          */
248         if (!cur_op) {
249                 spin_unlock(&orangefs_request_list_lock);
250                 return -EAGAIN;
251         }
252
253         gossip_debug(GOSSIP_DEV_DEBUG, "%s: reading op tag %llu %s\n",
254                      __func__,
255                      llu(cur_op->tag),
256                      get_opname_string(cur_op));
257
258         /*
259          * Such an op should never be on the list in the first place. If so, we
260          * will abort.
261          */
262         if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) {
263                 gossip_err("orangefs: ERROR: Current op already queued.\n");
264                 list_del_init(&cur_op->list);
265                 spin_unlock(&cur_op->lock);
266                 spin_unlock(&orangefs_request_list_lock);
267                 return -EAGAIN;
268         }
269
270         list_del_init(&cur_op->list);
271         spin_unlock(&orangefs_request_list_lock);
272
273         spin_unlock(&cur_op->lock);
274
275         /* Push the upcall out. */
276         ret = copy_to_user(buf, &proto_ver, sizeof(__s32));
277         if (ret != 0)
278                 goto error;
279         ret = copy_to_user(buf+sizeof(__s32), &magic, sizeof(__s32));
280         if (ret != 0)
281                 goto error;
282         ret = copy_to_user(buf+2 * sizeof(__s32), &cur_op->tag, sizeof(__u64));
283         if (ret != 0)
284                 goto error;
285         ret = copy_to_user(buf+2*sizeof(__s32)+sizeof(__u64), &cur_op->upcall,
286                            sizeof(struct orangefs_upcall_s));
287         if (ret != 0)
288                 goto error;
289
290         spin_lock(&orangefs_htable_ops_in_progress_lock);
291         spin_lock(&cur_op->lock);
292         if (unlikely(op_state_given_up(cur_op))) {
293                 spin_unlock(&cur_op->lock);
294                 spin_unlock(&orangefs_htable_ops_in_progress_lock);
295                 complete(&cur_op->waitq);
296                 goto restart;
297         }
298
299         /*
300          * Set the operation to be in progress and move it between lists since
301          * it has been sent to the client.
302          */
303         set_op_state_inprogress(cur_op);
304         gossip_debug(GOSSIP_DEV_DEBUG,
305                      "%s: 1 op:%s: op_state:%d: process:%s:\n",
306                      __func__,
307                      get_opname_string(cur_op),
308                      cur_op->op_state,
309                      current->comm);
310         orangefs_devreq_add_op(cur_op);
311         spin_unlock(&cur_op->lock);
312         spin_unlock(&orangefs_htable_ops_in_progress_lock);
313
314         /* The client only asks to read one size buffer. */
315         return MAX_DEV_REQ_UPSIZE;
316 error:
317         /*
318          * We were unable to copy the op data to the client. Put the op back in
319          * list. If client has crashed, the op will be purged later when the
320          * device is released.
321          */
322         gossip_err("orangefs: Failed to copy data to user space\n");
323         spin_lock(&orangefs_request_list_lock);
324         spin_lock(&cur_op->lock);
325         if (likely(!op_state_given_up(cur_op))) {
326                 set_op_state_waiting(cur_op);
327                 gossip_debug(GOSSIP_DEV_DEBUG,
328                              "%s: 2 op:%s: op_state:%d: process:%s:\n",
329                              __func__,
330                              get_opname_string(cur_op),
331                              cur_op->op_state,
332                              current->comm);
333                 list_add(&cur_op->list, &orangefs_request_list);
334                 spin_unlock(&cur_op->lock);
335         } else {
336                 spin_unlock(&cur_op->lock);
337                 complete(&cur_op->waitq);
338         }
339         spin_unlock(&orangefs_request_list_lock);
340         return -EFAULT;
341 }
342
343 /*
344  * Function for writev() callers into the device.
345  *
346  * Userspace should have written:
347  *  - __u32 version
348  *  - __u32 magic
349  *  - __u64 tag
350  *  - struct orangefs_downcall_s
351  *  - trailer buffer (in the case of READDIR operations)
352  */
353 static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
354                                       struct iov_iter *iter)
355 {
356         ssize_t ret;
357         struct orangefs_kernel_op_s *op = NULL;
358         struct {
359                 __u32 version;
360                 __u32 magic;
361                 __u64 tag;
362         } head;
363         int total = ret = iov_iter_count(iter);
364         int n;
365         int downcall_size = sizeof(struct orangefs_downcall_s);
366         int head_size = sizeof(head);
367
368         gossip_debug(GOSSIP_DEV_DEBUG, "%s: total:%d: ret:%zd:\n",
369                      __func__,
370                      total,
371                      ret);
372
373         if (total < MAX_DEV_REQ_DOWNSIZE) {
374                 gossip_err("%s: total:%d: must be at least:%u:\n",
375                            __func__,
376                            total,
377                            (unsigned int) MAX_DEV_REQ_DOWNSIZE);
378                 return -EFAULT;
379         }
380      
381         n = copy_from_iter(&head, head_size, iter);
382         if (n < head_size) {
383                 gossip_err("%s: failed to copy head.\n", __func__);
384                 return -EFAULT;
385         }
386
387         if (head.version < ORANGEFS_MINIMUM_USERSPACE_VERSION) {
388                 gossip_err("%s: userspace claims version"
389                            "%d, minimum version required: %d.\n",
390                            __func__,
391                            head.version,
392                            ORANGEFS_MINIMUM_USERSPACE_VERSION);
393                 return -EPROTO;
394         }
395
396         if (head.magic != ORANGEFS_DEVREQ_MAGIC) {
397                 gossip_err("Error: Device magic number does not match.\n");
398                 return -EPROTO;
399         }
400
401         if (!orangefs_userspace_version) {
402                 orangefs_userspace_version = head.version;
403         } else if (orangefs_userspace_version != head.version) {
404                 gossip_err("Error: userspace version changes\n");
405                 return -EPROTO;
406         }
407
408         /* remove the op from the in progress hash table */
409         op = orangefs_devreq_remove_op(head.tag);
410         if (!op) {
411                 gossip_debug(GOSSIP_DEV_DEBUG,
412                              "%s: No one's waiting for tag %llu\n",
413                              __func__, llu(head.tag));
414                 return ret;
415         }
416
417         n = copy_from_iter(&op->downcall, downcall_size, iter);
418         if (n != downcall_size) {
419                 gossip_err("%s: failed to copy downcall.\n", __func__);
420                 goto Efault;
421         }
422
423         if (op->downcall.status)
424                 goto wakeup;
425
426         /*
427          * We've successfully peeled off the head and the downcall. 
428          * Something has gone awry if total doesn't equal the
429          * sum of head_size, downcall_size and trailer_size.
430          */
431         if ((head_size + downcall_size + op->downcall.trailer_size) != total) {
432                 gossip_err("%s: funky write, head_size:%d"
433                            ": downcall_size:%d: trailer_size:%lld"
434                            ": total size:%d:\n",
435                            __func__,
436                            head_size,
437                            downcall_size,
438                            op->downcall.trailer_size,
439                            total);
440                 goto Efault;
441         }
442
443         /* Only READDIR operations should have trailers. */
444         if ((op->downcall.type != ORANGEFS_VFS_OP_READDIR) &&
445             (op->downcall.trailer_size != 0)) {
446                 gossip_err("%s: %x operation with trailer.",
447                            __func__,
448                            op->downcall.type);
449                 goto Efault;
450         }
451
452         /* READDIR operations should always have trailers. */
453         if ((op->downcall.type == ORANGEFS_VFS_OP_READDIR) &&
454             (op->downcall.trailer_size == 0)) {
455                 gossip_err("%s: %x operation with no trailer.",
456                            __func__,
457                            op->downcall.type);
458                 goto Efault;
459         }
460
461         if (op->downcall.type != ORANGEFS_VFS_OP_READDIR)
462                 goto wakeup;
463
464         op->downcall.trailer_buf =
465                 vmalloc(op->downcall.trailer_size);
466         if (op->downcall.trailer_buf == NULL) {
467                 gossip_err("%s: failed trailer vmalloc.\n",
468                            __func__);
469                 goto Enomem;
470         }
471         memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size);
472         n = copy_from_iter(op->downcall.trailer_buf,
473                            op->downcall.trailer_size,
474                            iter);
475         if (n != op->downcall.trailer_size) {
476                 gossip_err("%s: failed to copy trailer.\n", __func__);
477                 vfree(op->downcall.trailer_buf);
478                 goto Efault;
479         }
480
481 wakeup:
482         /*
483          * Return to vfs waitqueue, and back to service_operation
484          * through wait_for_matching_downcall. 
485          */
486         spin_lock(&op->lock);
487         if (unlikely(op_is_cancel(op))) {
488                 spin_unlock(&op->lock);
489                 put_cancel(op);
490         } else if (unlikely(op_state_given_up(op))) {
491                 spin_unlock(&op->lock);
492                 complete(&op->waitq);
493         } else {
494                 set_op_state_serviced(op);
495                 gossip_debug(GOSSIP_DEV_DEBUG,
496                              "%s: op:%s: op_state:%d: process:%s:\n",
497                              __func__,
498                              get_opname_string(op),
499                              op->op_state,
500                              current->comm);
501                 spin_unlock(&op->lock);
502         }
503         return ret;
504
505 Efault:
506         op->downcall.status = -(ORANGEFS_ERROR_BIT | 9);
507         ret = -EFAULT;
508         goto wakeup;
509
510 Enomem:
511         op->downcall.status = -(ORANGEFS_ERROR_BIT | 8);
512         ret = -ENOMEM;
513         goto wakeup;
514 }
515
516 /*
517  * NOTE: gets called when the last reference to this device is dropped.
518  * Using the open_access_count variable, we enforce a reference count
519  * on this file so that it can be opened by only one process at a time.
520  * the devreq_mutex is used to make sure all i/o has completed
521  * before we call orangefs_bufmap_finalize, and similar such tricky
522  * situations
523  */
524 static int orangefs_devreq_release(struct inode *inode, struct file *file)
525 {
526         int unmounted = 0;
527
528         gossip_debug(GOSSIP_DEV_DEBUG,
529                      "%s:pvfs2-client-core: exiting, closing device\n",
530                      __func__);
531
532         mutex_lock(&devreq_mutex);
533         orangefs_bufmap_finalize();
534
535         open_access_count = -1;
536
537         unmounted = mark_all_pending_mounts();
538         gossip_debug(GOSSIP_DEV_DEBUG, "ORANGEFS Device Close: Filesystem(s) %s\n",
539                      (unmounted ? "UNMOUNTED" : "MOUNTED"));
540
541         purge_waiting_ops();
542         purge_inprogress_ops();
543
544         orangefs_bufmap_run_down();
545
546         gossip_debug(GOSSIP_DEV_DEBUG,
547                      "pvfs2-client-core: device close complete\n");
548         open_access_count = 0;
549         orangefs_userspace_version = 0;
550         mutex_unlock(&devreq_mutex);
551         return 0;
552 }
553
554 int is_daemon_in_service(void)
555 {
556         int in_service;
557
558         /*
559          * What this function does is checks if client-core is alive
560          * based on the access count we maintain on the device.
561          */
562         mutex_lock(&devreq_mutex);
563         in_service = open_access_count == 1 ? 0 : -EIO;
564         mutex_unlock(&devreq_mutex);
565         return in_service;
566 }
567
568 bool __is_daemon_in_service(void)
569 {
570         return open_access_count == 1;
571 }
572
573 static inline long check_ioctl_command(unsigned int command)
574 {
575         /* Check for valid ioctl codes */
576         if (_IOC_TYPE(command) != ORANGEFS_DEV_MAGIC) {
577                 gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n",
578                         command,
579                         _IOC_TYPE(command),
580                         ORANGEFS_DEV_MAGIC);
581                 return -EINVAL;
582         }
583         /* and valid ioctl commands */
584         if (_IOC_NR(command) >= ORANGEFS_DEV_MAXNR || _IOC_NR(command) <= 0) {
585                 gossip_err("Invalid ioctl command number [%d >= %d]\n",
586                            _IOC_NR(command), ORANGEFS_DEV_MAXNR);
587                 return -ENOIOCTLCMD;
588         }
589         return 0;
590 }
591
592 static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
593 {
594         static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
595         static __s32 max_up_size = MAX_DEV_REQ_UPSIZE;
596         static __s32 max_down_size = MAX_DEV_REQ_DOWNSIZE;
597         struct ORANGEFS_dev_map_desc user_desc;
598         int ret = 0;
599         int upstream_kmod = 1;
600         struct orangefs_sb_info_s *orangefs_sb;
601
602         /* mtmoore: add locking here */
603
604         switch (command) {
605         case ORANGEFS_DEV_GET_MAGIC:
606                 return ((put_user(magic, (__s32 __user *) arg) == -EFAULT) ?
607                         -EIO :
608                         0);
609         case ORANGEFS_DEV_GET_MAX_UPSIZE:
610                 return ((put_user(max_up_size,
611                                   (__s32 __user *) arg) == -EFAULT) ?
612                                         -EIO :
613                                         0);
614         case ORANGEFS_DEV_GET_MAX_DOWNSIZE:
615                 return ((put_user(max_down_size,
616                                   (__s32 __user *) arg) == -EFAULT) ?
617                                         -EIO :
618                                         0);
619         case ORANGEFS_DEV_MAP:
620                 ret = copy_from_user(&user_desc,
621                                      (struct ORANGEFS_dev_map_desc __user *)
622                                      arg,
623                                      sizeof(struct ORANGEFS_dev_map_desc));
624                 /* WTF -EIO and not -EFAULT? */
625                 return ret ? -EIO : orangefs_bufmap_initialize(&user_desc);
626         case ORANGEFS_DEV_REMOUNT_ALL:
627                 gossip_debug(GOSSIP_DEV_DEBUG,
628                              "%s: got ORANGEFS_DEV_REMOUNT_ALL\n",
629                              __func__);
630
631                 /*
632                  * remount all mounted orangefs volumes to regain the lost
633                  * dynamic mount tables (if any) -- NOTE: this is done
634                  * without keeping the superblock list locked due to the
635                  * upcall/downcall waiting.  also, the request mutex is
636                  * used to ensure that no operations will be serviced until
637                  * all of the remounts are serviced (to avoid ops between
638                  * mounts to fail)
639                  */
640                 ret = mutex_lock_interruptible(&orangefs_request_mutex);
641                 if (ret < 0)
642                         return ret;
643                 gossip_debug(GOSSIP_DEV_DEBUG,
644                              "%s: priority remount in progress\n",
645                              __func__);
646                 spin_lock(&orangefs_superblocks_lock);
647                 list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
648                         /*
649                          * We have to drop the spinlock, so entries can be
650                          * removed.  They can't be freed, though, so we just
651                          * keep the forward pointers and zero the back ones -
652                          * that way we can get to the rest of the list.
653                          */
654                         if (!orangefs_sb->list.prev)
655                                 continue;
656                         gossip_debug(GOSSIP_DEV_DEBUG,
657                                      "%s: Remounting SB %p\n",
658                                      __func__,
659                                      orangefs_sb);
660
661                         spin_unlock(&orangefs_superblocks_lock);
662                         ret = orangefs_remount(orangefs_sb);
663                         spin_lock(&orangefs_superblocks_lock);
664                         if (ret) {
665                                 gossip_debug(GOSSIP_DEV_DEBUG,
666                                              "SB %p remount failed\n",
667                                              orangefs_sb);
668                                 break;
669                         }
670                 }
671                 spin_unlock(&orangefs_superblocks_lock);
672                 gossip_debug(GOSSIP_DEV_DEBUG,
673                              "%s: priority remount complete\n",
674                              __func__);
675                 mutex_unlock(&orangefs_request_mutex);
676                 return ret;
677
678         case ORANGEFS_DEV_UPSTREAM:
679                 ret = copy_to_user((void __user *)arg,
680                                     &upstream_kmod,
681                                     sizeof(upstream_kmod));
682
683                 if (ret != 0)
684                         return -EIO;
685                 else
686                         return ret;
687
688         case ORANGEFS_DEV_CLIENT_MASK:
689                 return orangefs_debugfs_new_client_mask((void __user *)arg);
690         case ORANGEFS_DEV_CLIENT_STRING:
691                 return orangefs_debugfs_new_client_string((void __user *)arg);
692         case ORANGEFS_DEV_DEBUG:
693                 return orangefs_debugfs_new_debug((void __user *)arg);
694         default:
695                 return -ENOIOCTLCMD;
696         }
697         return -ENOIOCTLCMD;
698 }
699
700 static long orangefs_devreq_ioctl(struct file *file,
701                                unsigned int command, unsigned long arg)
702 {
703         long ret;
704
705         /* Check for properly constructed commands */
706         ret = check_ioctl_command(command);
707         if (ret < 0)
708                 return (int)ret;
709
710         return (int)dispatch_ioctl_command(command, arg);
711 }
712
713 #ifdef CONFIG_COMPAT            /* CONFIG_COMPAT is in .config */
714
715 /*  Compat structure for the ORANGEFS_DEV_MAP ioctl */
716 struct ORANGEFS_dev_map_desc32 {
717         compat_uptr_t ptr;
718         __s32 total_size;
719         __s32 size;
720         __s32 count;
721 };
722
723 static unsigned long translate_dev_map26(unsigned long args, long *error)
724 {
725         struct ORANGEFS_dev_map_desc32 __user *p32 = (void __user *)args;
726         /*
727          * Depending on the architecture, allocate some space on the
728          * user-call-stack based on our expected layout.
729          */
730         struct ORANGEFS_dev_map_desc __user *p =
731             compat_alloc_user_space(sizeof(*p));
732         compat_uptr_t addr;
733
734         *error = 0;
735         /* get the ptr from the 32 bit user-space */
736         if (get_user(addr, &p32->ptr))
737                 goto err;
738         /* try to put that into a 64-bit layout */
739         if (put_user(compat_ptr(addr), &p->ptr))
740                 goto err;
741         /* copy the remaining fields */
742         if (copy_in_user(&p->total_size, &p32->total_size, sizeof(__s32)))
743                 goto err;
744         if (copy_in_user(&p->size, &p32->size, sizeof(__s32)))
745                 goto err;
746         if (copy_in_user(&p->count, &p32->count, sizeof(__s32)))
747                 goto err;
748         return (unsigned long)p;
749 err:
750         *error = -EFAULT;
751         return 0;
752 }
753
754 /*
755  * 32 bit user-space apps' ioctl handlers when kernel modules
756  * is compiled as a 64 bit one
757  */
758 static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd,
759                                       unsigned long args)
760 {
761         long ret;
762         unsigned long arg = args;
763
764         /* Check for properly constructed commands */
765         ret = check_ioctl_command(cmd);
766         if (ret < 0)
767                 return ret;
768         if (cmd == ORANGEFS_DEV_MAP) {
769                 /*
770                  * convert the arguments to what we expect internally
771                  * in kernel space
772                  */
773                 arg = translate_dev_map26(args, &ret);
774                 if (ret < 0) {
775                         gossip_err("Could not translate dev map\n");
776                         return ret;
777                 }
778         }
779         /* no other ioctl requires translation */
780         return dispatch_ioctl_command(cmd, arg);
781 }
782
783 #endif /* CONFIG_COMPAT is in .config */
784
785 /* the assigned character device major number */
786 static int orangefs_dev_major;
787
788 /*
789  * Initialize orangefs device specific state:
790  * Must be called at module load time only
791  */
792 int orangefs_dev_init(void)
793 {
794         /* register orangefs-req device  */
795         orangefs_dev_major = register_chrdev(0,
796                                           ORANGEFS_REQDEVICE_NAME,
797                                           &orangefs_devreq_file_operations);
798         if (orangefs_dev_major < 0) {
799                 gossip_debug(GOSSIP_DEV_DEBUG,
800                              "Failed to register /dev/%s (error %d)\n",
801                              ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
802                 return orangefs_dev_major;
803         }
804
805         gossip_debug(GOSSIP_DEV_DEBUG,
806                      "*** /dev/%s character device registered ***\n",
807                      ORANGEFS_REQDEVICE_NAME);
808         gossip_debug(GOSSIP_DEV_DEBUG, "'mknod /dev/%s c %d 0'.\n",
809                      ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
810         return 0;
811 }
812
813 void orangefs_dev_cleanup(void)
814 {
815         unregister_chrdev(orangefs_dev_major, ORANGEFS_REQDEVICE_NAME);
816         gossip_debug(GOSSIP_DEV_DEBUG,
817                      "*** /dev/%s character device unregistered ***\n",
818                      ORANGEFS_REQDEVICE_NAME);
819 }
820
821 static unsigned int orangefs_devreq_poll(struct file *file,
822                                       struct poll_table_struct *poll_table)
823 {
824         int poll_revent_mask = 0;
825
826         poll_wait(file, &orangefs_request_list_waitq, poll_table);
827
828         if (!list_empty(&orangefs_request_list))
829                 poll_revent_mask |= POLL_IN;
830         return poll_revent_mask;
831 }
832
833 const struct file_operations orangefs_devreq_file_operations = {
834         .owner = THIS_MODULE,
835         .read = orangefs_devreq_read,
836         .write_iter = orangefs_devreq_write_iter,
837         .open = orangefs_devreq_open,
838         .release = orangefs_devreq_release,
839         .unlocked_ioctl = orangefs_devreq_ioctl,
840
841 #ifdef CONFIG_COMPAT            /* CONFIG_COMPAT is in .config */
842         .compat_ioctl = orangefs_devreq_compat_ioctl,
843 #endif
844         .poll = orangefs_devreq_poll
845 };