GNU Linux-libre 4.19.211-gnu1
[releases.git] / fs / orangefs / orangefs-utils.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * (C) 2001 Clemson University and The University of Chicago
4  *
5  * See COPYING in top-level directory.
6  */
7 #include <linux/kernel.h>
8 #include "protocol.h"
9 #include "orangefs-kernel.h"
10 #include "orangefs-dev-proto.h"
11 #include "orangefs-bufmap.h"
12
13 __s32 fsid_of_op(struct orangefs_kernel_op_s *op)
14 {
15         __s32 fsid = ORANGEFS_FS_ID_NULL;
16
17         if (op) {
18                 switch (op->upcall.type) {
19                 case ORANGEFS_VFS_OP_FILE_IO:
20                         fsid = op->upcall.req.io.refn.fs_id;
21                         break;
22                 case ORANGEFS_VFS_OP_LOOKUP:
23                         fsid = op->upcall.req.lookup.parent_refn.fs_id;
24                         break;
25                 case ORANGEFS_VFS_OP_CREATE:
26                         fsid = op->upcall.req.create.parent_refn.fs_id;
27                         break;
28                 case ORANGEFS_VFS_OP_GETATTR:
29                         fsid = op->upcall.req.getattr.refn.fs_id;
30                         break;
31                 case ORANGEFS_VFS_OP_REMOVE:
32                         fsid = op->upcall.req.remove.parent_refn.fs_id;
33                         break;
34                 case ORANGEFS_VFS_OP_MKDIR:
35                         fsid = op->upcall.req.mkdir.parent_refn.fs_id;
36                         break;
37                 case ORANGEFS_VFS_OP_READDIR:
38                         fsid = op->upcall.req.readdir.refn.fs_id;
39                         break;
40                 case ORANGEFS_VFS_OP_SETATTR:
41                         fsid = op->upcall.req.setattr.refn.fs_id;
42                         break;
43                 case ORANGEFS_VFS_OP_SYMLINK:
44                         fsid = op->upcall.req.sym.parent_refn.fs_id;
45                         break;
46                 case ORANGEFS_VFS_OP_RENAME:
47                         fsid = op->upcall.req.rename.old_parent_refn.fs_id;
48                         break;
49                 case ORANGEFS_VFS_OP_STATFS:
50                         fsid = op->upcall.req.statfs.fs_id;
51                         break;
52                 case ORANGEFS_VFS_OP_TRUNCATE:
53                         fsid = op->upcall.req.truncate.refn.fs_id;
54                         break;
55                 case ORANGEFS_VFS_OP_RA_FLUSH:
56                         fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
57                         break;
58                 case ORANGEFS_VFS_OP_FS_UMOUNT:
59                         fsid = op->upcall.req.fs_umount.fs_id;
60                         break;
61                 case ORANGEFS_VFS_OP_GETXATTR:
62                         fsid = op->upcall.req.getxattr.refn.fs_id;
63                         break;
64                 case ORANGEFS_VFS_OP_SETXATTR:
65                         fsid = op->upcall.req.setxattr.refn.fs_id;
66                         break;
67                 case ORANGEFS_VFS_OP_LISTXATTR:
68                         fsid = op->upcall.req.listxattr.refn.fs_id;
69                         break;
70                 case ORANGEFS_VFS_OP_REMOVEXATTR:
71                         fsid = op->upcall.req.removexattr.refn.fs_id;
72                         break;
73                 case ORANGEFS_VFS_OP_FSYNC:
74                         fsid = op->upcall.req.fsync.refn.fs_id;
75                         break;
76                 default:
77                         break;
78                 }
79         }
80         return fsid;
81 }
82
83 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
84 {
85         int flags = 0;
86         if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
87                 flags |= S_IMMUTABLE;
88         else
89                 flags &= ~S_IMMUTABLE;
90         if (attrs->flags & ORANGEFS_APPEND_FL)
91                 flags |= S_APPEND;
92         else
93                 flags &= ~S_APPEND;
94         if (attrs->flags & ORANGEFS_NOATIME_FL)
95                 flags |= S_NOATIME;
96         else
97                 flags &= ~S_NOATIME;
98         return flags;
99 }
100
101 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
102 {
103         int perm_mode = 0;
104
105         if (attrs->perms & ORANGEFS_O_EXECUTE)
106                 perm_mode |= S_IXOTH;
107         if (attrs->perms & ORANGEFS_O_WRITE)
108                 perm_mode |= S_IWOTH;
109         if (attrs->perms & ORANGEFS_O_READ)
110                 perm_mode |= S_IROTH;
111
112         if (attrs->perms & ORANGEFS_G_EXECUTE)
113                 perm_mode |= S_IXGRP;
114         if (attrs->perms & ORANGEFS_G_WRITE)
115                 perm_mode |= S_IWGRP;
116         if (attrs->perms & ORANGEFS_G_READ)
117                 perm_mode |= S_IRGRP;
118
119         if (attrs->perms & ORANGEFS_U_EXECUTE)
120                 perm_mode |= S_IXUSR;
121         if (attrs->perms & ORANGEFS_U_WRITE)
122                 perm_mode |= S_IWUSR;
123         if (attrs->perms & ORANGEFS_U_READ)
124                 perm_mode |= S_IRUSR;
125
126         if (attrs->perms & ORANGEFS_G_SGID)
127                 perm_mode |= S_ISGID;
128         if (attrs->perms & ORANGEFS_U_SUID)
129                 perm_mode |= S_ISUID;
130
131         return perm_mode;
132 }
133
134 /*
135  * NOTE: in kernel land, we never use the sys_attr->link_target for
136  * anything, so don't bother copying it into the sys_attr object here.
137  */
138 static inline int copy_attributes_from_inode(struct inode *inode,
139                                              struct ORANGEFS_sys_attr_s *attrs,
140                                              struct iattr *iattr)
141 {
142         umode_t tmp_mode;
143
144         if (!iattr || !inode || !attrs) {
145                 gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
146                            "in copy_attributes_from_inode!\n",
147                            iattr,
148                            inode,
149                            attrs);
150                 return -EINVAL;
151         }
152         /*
153          * We need to be careful to only copy the attributes out of the
154          * iattr object that we know are valid.
155          */
156         attrs->mask = 0;
157         if (iattr->ia_valid & ATTR_UID) {
158                 attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid);
159                 attrs->mask |= ORANGEFS_ATTR_SYS_UID;
160                 gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
161         }
162         if (iattr->ia_valid & ATTR_GID) {
163                 attrs->group = from_kgid(&init_user_ns, iattr->ia_gid);
164                 attrs->mask |= ORANGEFS_ATTR_SYS_GID;
165                 gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
166         }
167
168         if (iattr->ia_valid & ATTR_ATIME) {
169                 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
170                 if (iattr->ia_valid & ATTR_ATIME_SET) {
171                         attrs->atime = (time64_t)iattr->ia_atime.tv_sec;
172                         attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
173                 }
174         }
175         if (iattr->ia_valid & ATTR_MTIME) {
176                 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
177                 if (iattr->ia_valid & ATTR_MTIME_SET) {
178                         attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec;
179                         attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
180                 }
181         }
182         if (iattr->ia_valid & ATTR_CTIME)
183                 attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
184
185         /*
186          * ORANGEFS cannot set size with a setattr operation. Probably not
187          * likely to be requested through the VFS, but just in case, don't
188          * worry about ATTR_SIZE
189          */
190
191         if (iattr->ia_valid & ATTR_MODE) {
192                 tmp_mode = iattr->ia_mode;
193                 if (tmp_mode & (S_ISVTX)) {
194                         if (is_root_handle(inode)) {
195                                 /*
196                                  * allow sticky bit to be set on root (since
197                                  * it shows up that way by default anyhow),
198                                  * but don't show it to the server
199                                  */
200                                 tmp_mode -= S_ISVTX;
201                         } else {
202                                 gossip_debug(GOSSIP_UTILS_DEBUG,
203                                         "%s: setting sticky bit not supported.\n",
204                                         __func__);
205                                 return -EINVAL;
206                         }
207                 }
208
209                 if (tmp_mode & (S_ISUID)) {
210                         gossip_debug(GOSSIP_UTILS_DEBUG,
211                                 "%s: setting setuid bit not supported.\n",
212                                 __func__);
213                         return -EINVAL;
214                 }
215
216                 attrs->perms = ORANGEFS_util_translate_mode(tmp_mode);
217                 attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
218         }
219
220         return 0;
221 }
222
223 static int orangefs_inode_type(enum orangefs_ds_type objtype)
224 {
225         if (objtype == ORANGEFS_TYPE_METAFILE)
226                 return S_IFREG;
227         else if (objtype == ORANGEFS_TYPE_DIRECTORY)
228                 return S_IFDIR;
229         else if (objtype == ORANGEFS_TYPE_SYMLINK)
230                 return S_IFLNK;
231         else
232                 return -1;
233 }
234
235 static void orangefs_make_bad_inode(struct inode *inode)
236 {
237         if (is_root_handle(inode)) {
238                 /*
239                  * if this occurs, the pvfs2-client-core was killed but we
240                  * can't afford to lose the inode operations and such
241                  * associated with the root handle in any case.
242                  */
243                 gossip_debug(GOSSIP_UTILS_DEBUG,
244                              "*** NOT making bad root inode %pU\n",
245                              get_khandle_from_ino(inode));
246         } else {
247                 gossip_debug(GOSSIP_UTILS_DEBUG,
248                              "*** making bad inode %pU\n",
249                              get_khandle_from_ino(inode));
250                 make_bad_inode(inode);
251         }
252 }
253
254 static int orangefs_inode_is_stale(struct inode *inode,
255     struct ORANGEFS_sys_attr_s *attrs, char *link_target)
256 {
257         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
258         int type = orangefs_inode_type(attrs->objtype);
259         /*
260          * If the inode type or symlink target have changed then this
261          * inode is stale.
262          */
263         if (type == -1 || !(inode->i_mode & type)) {
264                 orangefs_make_bad_inode(inode);
265                 return 1;
266         }
267         if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
268             link_target, ORANGEFS_NAME_MAX)) {
269                 orangefs_make_bad_inode(inode);
270                 return 1;
271         }
272         return 0;
273 }
274
275 int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
276     u32 request_mask)
277 {
278         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
279         struct orangefs_kernel_op_s *new_op;
280         loff_t inode_size;
281         int ret, type;
282
283         gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
284             get_khandle_from_ino(inode));
285
286         if (!new && !bypass) {
287                 /*
288                  * Must have all the attributes in the mask and be within cache
289                  * time.
290                  */
291                 if ((request_mask & orangefs_inode->getattr_mask) ==
292                     request_mask &&
293                     time_before(jiffies, orangefs_inode->getattr_time))
294                         return 0;
295         }
296
297         new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
298         if (!new_op)
299                 return -ENOMEM;
300         new_op->upcall.req.getattr.refn = orangefs_inode->refn;
301         /*
302          * Size is the hardest attribute to get.  The incremental cost of any
303          * other attribute is essentially zero.
304          */
305         if (request_mask & STATX_SIZE || new)
306                 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
307         else
308                 new_op->upcall.req.getattr.mask =
309                     ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
310
311         ret = service_operation(new_op, __func__,
312             get_interruptible_flag(inode));
313         if (ret != 0)
314                 goto out;
315
316         if (!new) {
317                 ret = orangefs_inode_is_stale(inode,
318                     &new_op->downcall.resp.getattr.attributes,
319                     new_op->downcall.resp.getattr.link_target);
320                 if (ret) {
321                         ret = -ESTALE;
322                         goto out;
323                 }
324         }
325
326         type = orangefs_inode_type(new_op->
327             downcall.resp.getattr.attributes.objtype);
328         switch (type) {
329         case S_IFREG:
330                 inode->i_flags = orangefs_inode_flags(&new_op->
331                     downcall.resp.getattr.attributes);
332                 if (request_mask & STATX_SIZE || new) {
333                         inode_size = (loff_t)new_op->
334                             downcall.resp.getattr.attributes.size;
335                         inode->i_size = inode_size;
336                         inode->i_blkbits = ffs(new_op->downcall.resp.getattr.
337                             attributes.blksize);
338                         spin_lock(&inode->i_lock);
339                         inode->i_bytes = inode_size;
340                         inode->i_blocks =
341                             (inode_size + 512 - inode_size % 512)/512;
342                         spin_unlock(&inode->i_lock);
343                 }
344                 break;
345         case S_IFDIR:
346                 if (request_mask & STATX_SIZE || new) {
347                         inode->i_size = PAGE_SIZE;
348                         spin_lock(&inode->i_lock);
349                         inode_set_bytes(inode, inode->i_size);
350                         spin_unlock(&inode->i_lock);
351                 }
352                 set_nlink(inode, 1);
353                 break;
354         case S_IFLNK:
355                 if (new) {
356                         inode->i_size = (loff_t)strlen(new_op->
357                             downcall.resp.getattr.link_target);
358                         ret = strscpy(orangefs_inode->link_target,
359                             new_op->downcall.resp.getattr.link_target,
360                             ORANGEFS_NAME_MAX);
361                         if (ret == -E2BIG) {
362                                 ret = -EIO;
363                                 goto out;
364                         }
365                         inode->i_link = orangefs_inode->link_target;
366                 }
367                 break;
368         /* i.e. -1 */
369         default:
370                 /* XXX: ESTALE?  This is what is done if it is not new. */
371                 orangefs_make_bad_inode(inode);
372                 ret = -ESTALE;
373                 goto out;
374         }
375
376         inode->i_uid = make_kuid(&init_user_ns, new_op->
377             downcall.resp.getattr.attributes.owner);
378         inode->i_gid = make_kgid(&init_user_ns, new_op->
379             downcall.resp.getattr.attributes.group);
380         inode->i_atime.tv_sec = (time64_t)new_op->
381             downcall.resp.getattr.attributes.atime;
382         inode->i_mtime.tv_sec = (time64_t)new_op->
383             downcall.resp.getattr.attributes.mtime;
384         inode->i_ctime.tv_sec = (time64_t)new_op->
385             downcall.resp.getattr.attributes.ctime;
386         inode->i_atime.tv_nsec = 0;
387         inode->i_mtime.tv_nsec = 0;
388         inode->i_ctime.tv_nsec = 0;
389
390         /* special case: mark the root inode as sticky */
391         inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
392             orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
393
394         orangefs_inode->getattr_time = jiffies +
395             orangefs_getattr_timeout_msecs*HZ/1000;
396         if (request_mask & STATX_SIZE || new)
397                 orangefs_inode->getattr_mask = STATX_BASIC_STATS;
398         else
399                 orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE;
400         ret = 0;
401 out:
402         op_release(new_op);
403         return ret;
404 }
405
406 int orangefs_inode_check_changed(struct inode *inode)
407 {
408         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
409         struct orangefs_kernel_op_s *new_op;
410         int ret;
411
412         gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
413             get_khandle_from_ino(inode));
414
415         new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
416         if (!new_op)
417                 return -ENOMEM;
418         new_op->upcall.req.getattr.refn = orangefs_inode->refn;
419         new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
420             ORANGEFS_ATTR_SYS_LNK_TARGET;
421
422         ret = service_operation(new_op, __func__,
423             get_interruptible_flag(inode));
424         if (ret != 0)
425                 goto out;
426
427         ret = orangefs_inode_is_stale(inode,
428             &new_op->downcall.resp.getattr.attributes,
429             new_op->downcall.resp.getattr.link_target);
430 out:
431         op_release(new_op);
432         return ret;
433 }
434
435 /*
436  * issues a orangefs setattr request to make sure the new attribute values
437  * take effect if successful.  returns 0 on success; -errno otherwise
438  */
439 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
440 {
441         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
442         struct orangefs_kernel_op_s *new_op;
443         int ret;
444
445         new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
446         if (!new_op)
447                 return -ENOMEM;
448
449         new_op->upcall.req.setattr.refn = orangefs_inode->refn;
450         ret = copy_attributes_from_inode(inode,
451                        &new_op->upcall.req.setattr.attributes,
452                        iattr);
453         if (ret >= 0) {
454                 ret = service_operation(new_op, __func__,
455                                 get_interruptible_flag(inode));
456
457                 gossip_debug(GOSSIP_UTILS_DEBUG,
458                              "orangefs_inode_setattr: returning %d\n",
459                              ret);
460         }
461
462         op_release(new_op);
463
464         if (ret == 0)
465                 orangefs_inode->getattr_time = jiffies - 1;
466
467         return ret;
468 }
469
470 /*
471  * The following is a very dirty hack that is now a permanent part of the
472  * ORANGEFS protocol. See protocol.h for more error definitions.
473  */
474
475 /* The order matches include/orangefs-types.h in the OrangeFS source. */
476 static int PINT_errno_mapping[] = {
477         0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
478         EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
479         EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
480         ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
481         EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
482         EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
483         ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
484         EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
485         ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
486         EACCES, ECONNRESET, ERANGE
487 };
488
489 int orangefs_normalize_to_errno(__s32 error_code)
490 {
491         __u32 i;
492
493         /* Success */
494         if (error_code == 0) {
495                 return 0;
496         /*
497          * This shouldn't ever happen. If it does it should be fixed on the
498          * server.
499          */
500         } else if (error_code > 0) {
501                 gossip_err("orangefs: error status received.\n");
502                 gossip_err("orangefs: assuming error code is inverted.\n");
503                 error_code = -error_code;
504         }
505
506         /*
507          * XXX: This is very bad since error codes from ORANGEFS may not be
508          * suitable for return into userspace.
509          */
510
511         /*
512          * Convert ORANGEFS error values into errno values suitable for return
513          * from the kernel.
514          */
515         if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
516                 if (((-error_code) &
517                     (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
518                     ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
519                         /*
520                          * cancellation error codes generally correspond to
521                          * a timeout from the client's perspective
522                          */
523                         error_code = -ETIMEDOUT;
524                 } else {
525                         /* assume a default error code */
526                         gossip_err("%s: bad error code :%d:.\n",
527                                 __func__,
528                                 error_code);
529                         error_code = -EINVAL;
530                 }
531
532         /* Convert ORANGEFS encoded errno values into regular errno values. */
533         } else if ((-error_code) & ORANGEFS_ERROR_BIT) {
534                 i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
535                 if (i < ARRAY_SIZE(PINT_errno_mapping))
536                         error_code = -PINT_errno_mapping[i];
537                 else
538                         error_code = -EINVAL;
539
540         /*
541          * Only ORANGEFS protocol error codes should ever come here. Otherwise
542          * there is a bug somewhere.
543          */
544         } else {
545                 gossip_err("%s: unknown error code.\n", __func__);
546                 error_code = -EINVAL;
547         }
548         return error_code;
549 }
550
551 #define NUM_MODES 11
552 __s32 ORANGEFS_util_translate_mode(int mode)
553 {
554         int ret = 0;
555         int i = 0;
556         static int modes[NUM_MODES] = {
557                 S_IXOTH, S_IWOTH, S_IROTH,
558                 S_IXGRP, S_IWGRP, S_IRGRP,
559                 S_IXUSR, S_IWUSR, S_IRUSR,
560                 S_ISGID, S_ISUID
561         };
562         static int orangefs_modes[NUM_MODES] = {
563                 ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
564                 ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
565                 ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
566                 ORANGEFS_G_SGID, ORANGEFS_U_SUID
567         };
568
569         for (i = 0; i < NUM_MODES; i++)
570                 if (mode & modes[i])
571                         ret |= orangefs_modes[i];
572
573         return ret;
574 }
575 #undef NUM_MODES