1 // SPDX-License-Identifier: GPL-2.0
3 * (C) 2001 Clemson University and The University of Chicago
5 * See COPYING in top-level directory.
8 #include "orangefs-kernel.h"
9 #include "orangefs-dev-proto.h"
10 #include "orangefs-bufmap.h"
12 __s32 fsid_of_op(struct orangefs_kernel_op_s *op)
14 __s32 fsid = ORANGEFS_FS_ID_NULL;
17 switch (op->upcall.type) {
18 case ORANGEFS_VFS_OP_FILE_IO:
19 fsid = op->upcall.req.io.refn.fs_id;
21 case ORANGEFS_VFS_OP_LOOKUP:
22 fsid = op->upcall.req.lookup.parent_refn.fs_id;
24 case ORANGEFS_VFS_OP_CREATE:
25 fsid = op->upcall.req.create.parent_refn.fs_id;
27 case ORANGEFS_VFS_OP_GETATTR:
28 fsid = op->upcall.req.getattr.refn.fs_id;
30 case ORANGEFS_VFS_OP_REMOVE:
31 fsid = op->upcall.req.remove.parent_refn.fs_id;
33 case ORANGEFS_VFS_OP_MKDIR:
34 fsid = op->upcall.req.mkdir.parent_refn.fs_id;
36 case ORANGEFS_VFS_OP_READDIR:
37 fsid = op->upcall.req.readdir.refn.fs_id;
39 case ORANGEFS_VFS_OP_SETATTR:
40 fsid = op->upcall.req.setattr.refn.fs_id;
42 case ORANGEFS_VFS_OP_SYMLINK:
43 fsid = op->upcall.req.sym.parent_refn.fs_id;
45 case ORANGEFS_VFS_OP_RENAME:
46 fsid = op->upcall.req.rename.old_parent_refn.fs_id;
48 case ORANGEFS_VFS_OP_STATFS:
49 fsid = op->upcall.req.statfs.fs_id;
51 case ORANGEFS_VFS_OP_TRUNCATE:
52 fsid = op->upcall.req.truncate.refn.fs_id;
54 case ORANGEFS_VFS_OP_RA_FLUSH:
55 fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
57 case ORANGEFS_VFS_OP_FS_UMOUNT:
58 fsid = op->upcall.req.fs_umount.fs_id;
60 case ORANGEFS_VFS_OP_GETXATTR:
61 fsid = op->upcall.req.getxattr.refn.fs_id;
63 case ORANGEFS_VFS_OP_SETXATTR:
64 fsid = op->upcall.req.setxattr.refn.fs_id;
66 case ORANGEFS_VFS_OP_LISTXATTR:
67 fsid = op->upcall.req.listxattr.refn.fs_id;
69 case ORANGEFS_VFS_OP_REMOVEXATTR:
70 fsid = op->upcall.req.removexattr.refn.fs_id;
72 case ORANGEFS_VFS_OP_FSYNC:
73 fsid = op->upcall.req.fsync.refn.fs_id;
82 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
85 if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
88 flags &= ~S_IMMUTABLE;
89 if (attrs->flags & ORANGEFS_APPEND_FL)
93 if (attrs->flags & ORANGEFS_NOATIME_FL)
100 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
104 if (attrs->perms & ORANGEFS_O_EXECUTE)
105 perm_mode |= S_IXOTH;
106 if (attrs->perms & ORANGEFS_O_WRITE)
107 perm_mode |= S_IWOTH;
108 if (attrs->perms & ORANGEFS_O_READ)
109 perm_mode |= S_IROTH;
111 if (attrs->perms & ORANGEFS_G_EXECUTE)
112 perm_mode |= S_IXGRP;
113 if (attrs->perms & ORANGEFS_G_WRITE)
114 perm_mode |= S_IWGRP;
115 if (attrs->perms & ORANGEFS_G_READ)
116 perm_mode |= S_IRGRP;
118 if (attrs->perms & ORANGEFS_U_EXECUTE)
119 perm_mode |= S_IXUSR;
120 if (attrs->perms & ORANGEFS_U_WRITE)
121 perm_mode |= S_IWUSR;
122 if (attrs->perms & ORANGEFS_U_READ)
123 perm_mode |= S_IRUSR;
125 if (attrs->perms & ORANGEFS_G_SGID)
126 perm_mode |= S_ISGID;
127 if (attrs->perms & ORANGEFS_U_SUID)
128 perm_mode |= S_ISUID;
134 * NOTE: in kernel land, we never use the sys_attr->link_target for
135 * anything, so don't bother copying it into the sys_attr object here.
137 static inline int copy_attributes_from_inode(struct inode *inode,
138 struct ORANGEFS_sys_attr_s *attrs,
143 if (!iattr || !inode || !attrs) {
144 gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
145 "in copy_attributes_from_inode!\n",
152 * We need to be careful to only copy the attributes out of the
153 * iattr object that we know are valid.
156 if (iattr->ia_valid & ATTR_UID) {
157 attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid);
158 attrs->mask |= ORANGEFS_ATTR_SYS_UID;
159 gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
161 if (iattr->ia_valid & ATTR_GID) {
162 attrs->group = from_kgid(&init_user_ns, iattr->ia_gid);
163 attrs->mask |= ORANGEFS_ATTR_SYS_GID;
164 gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
167 if (iattr->ia_valid & ATTR_ATIME) {
168 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
169 if (iattr->ia_valid & ATTR_ATIME_SET) {
170 attrs->atime = (time64_t)iattr->ia_atime.tv_sec;
171 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
174 if (iattr->ia_valid & ATTR_MTIME) {
175 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
176 if (iattr->ia_valid & ATTR_MTIME_SET) {
177 attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec;
178 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
181 if (iattr->ia_valid & ATTR_CTIME)
182 attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
185 * ORANGEFS cannot set size with a setattr operation. Probably not likely
186 * to be requested through the VFS, but just in case, don't worry about
190 if (iattr->ia_valid & ATTR_MODE) {
191 tmp_mode = iattr->ia_mode;
192 if (tmp_mode & (S_ISVTX)) {
193 if (is_root_handle(inode)) {
195 * allow sticky bit to be set on root (since
196 * it shows up that way by default anyhow),
197 * but don't show it to the server
201 gossip_debug(GOSSIP_UTILS_DEBUG,
202 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
207 if (tmp_mode & (S_ISUID)) {
208 gossip_debug(GOSSIP_UTILS_DEBUG,
209 "Attempting to set setuid bit (not supported); returning EINVAL.\n");
213 attrs->perms = ORANGEFS_util_translate_mode(tmp_mode);
214 attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
220 static int orangefs_inode_type(enum orangefs_ds_type objtype)
222 if (objtype == ORANGEFS_TYPE_METAFILE)
224 else if (objtype == ORANGEFS_TYPE_DIRECTORY)
226 else if (objtype == ORANGEFS_TYPE_SYMLINK)
232 static int orangefs_inode_is_stale(struct inode *inode, int new,
233 struct ORANGEFS_sys_attr_s *attrs, char *link_target)
235 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
236 int type = orangefs_inode_type(attrs->objtype);
239 * If the inode type or symlink target have changed then this
242 if (type == -1 || !(inode->i_mode & type)) {
243 orangefs_make_bad_inode(inode);
246 if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
247 link_target, ORANGEFS_NAME_MAX)) {
248 orangefs_make_bad_inode(inode);
255 int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
258 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
259 struct orangefs_kernel_op_s *new_op;
260 loff_t inode_size, rounded_up_size;
263 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
264 get_khandle_from_ino(inode));
266 if (!new && !bypass) {
268 * Must have all the attributes in the mask and be within cache
271 if ((request_mask & orangefs_inode->getattr_mask) ==
273 time_before(jiffies, orangefs_inode->getattr_time))
277 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
280 new_op->upcall.req.getattr.refn = orangefs_inode->refn;
282 * Size is the hardest attribute to get. The incremental cost of any
283 * other attribute is essentially zero.
285 if (request_mask & STATX_SIZE || new)
286 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
288 new_op->upcall.req.getattr.mask =
289 ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
291 ret = service_operation(new_op, __func__,
292 get_interruptible_flag(inode));
296 type = orangefs_inode_type(new_op->
297 downcall.resp.getattr.attributes.objtype);
298 ret = orangefs_inode_is_stale(inode, new,
299 &new_op->downcall.resp.getattr.attributes,
300 new_op->downcall.resp.getattr.link_target);
308 inode->i_flags = orangefs_inode_flags(&new_op->
309 downcall.resp.getattr.attributes);
310 if (request_mask & STATX_SIZE || new) {
311 inode_size = (loff_t)new_op->
312 downcall.resp.getattr.attributes.size;
314 (inode_size + (4096 - (inode_size % 4096)));
315 inode->i_size = inode_size;
316 orangefs_inode->blksize =
317 new_op->downcall.resp.getattr.attributes.blksize;
318 spin_lock(&inode->i_lock);
319 inode->i_bytes = inode_size;
321 (unsigned long)(rounded_up_size / 512);
322 spin_unlock(&inode->i_lock);
326 if (request_mask & STATX_SIZE || new) {
327 inode->i_size = PAGE_SIZE;
328 orangefs_inode->blksize = i_blocksize(inode);
329 spin_lock(&inode->i_lock);
330 inode_set_bytes(inode, inode->i_size);
331 spin_unlock(&inode->i_lock);
337 inode->i_size = (loff_t)strlen(new_op->
338 downcall.resp.getattr.link_target);
339 orangefs_inode->blksize = i_blocksize(inode);
340 ret = strscpy(orangefs_inode->link_target,
341 new_op->downcall.resp.getattr.link_target,
347 inode->i_link = orangefs_inode->link_target;
352 inode->i_uid = make_kuid(&init_user_ns, new_op->
353 downcall.resp.getattr.attributes.owner);
354 inode->i_gid = make_kgid(&init_user_ns, new_op->
355 downcall.resp.getattr.attributes.group);
356 inode->i_atime.tv_sec = (time64_t)new_op->
357 downcall.resp.getattr.attributes.atime;
358 inode->i_mtime.tv_sec = (time64_t)new_op->
359 downcall.resp.getattr.attributes.mtime;
360 inode->i_ctime.tv_sec = (time64_t)new_op->
361 downcall.resp.getattr.attributes.ctime;
362 inode->i_atime.tv_nsec = 0;
363 inode->i_mtime.tv_nsec = 0;
364 inode->i_ctime.tv_nsec = 0;
366 /* special case: mark the root inode as sticky */
367 inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
368 orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
370 orangefs_inode->getattr_time = jiffies +
371 orangefs_getattr_timeout_msecs*HZ/1000;
372 if (request_mask & STATX_SIZE || new)
373 orangefs_inode->getattr_mask = STATX_BASIC_STATS;
375 orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE;
382 int orangefs_inode_check_changed(struct inode *inode)
384 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
385 struct orangefs_kernel_op_s *new_op;
388 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
389 get_khandle_from_ino(inode));
391 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
394 new_op->upcall.req.getattr.refn = orangefs_inode->refn;
395 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
396 ORANGEFS_ATTR_SYS_LNK_TARGET;
398 ret = service_operation(new_op, __func__,
399 get_interruptible_flag(inode));
403 ret = orangefs_inode_is_stale(inode, 0,
404 &new_op->downcall.resp.getattr.attributes,
405 new_op->downcall.resp.getattr.link_target);
412 * issues a orangefs setattr request to make sure the new attribute values
413 * take effect if successful. returns 0 on success; -errno otherwise
415 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
417 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
418 struct orangefs_kernel_op_s *new_op;
421 new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
425 new_op->upcall.req.setattr.refn = orangefs_inode->refn;
426 ret = copy_attributes_from_inode(inode,
427 &new_op->upcall.req.setattr.attributes,
430 ret = service_operation(new_op, __func__,
431 get_interruptible_flag(inode));
433 gossip_debug(GOSSIP_UTILS_DEBUG,
434 "orangefs_inode_setattr: returning %d\n",
441 * successful setattr should clear the atime, mtime and
445 ClearAtimeFlag(orangefs_inode);
446 ClearMtimeFlag(orangefs_inode);
447 ClearCtimeFlag(orangefs_inode);
448 ClearModeFlag(orangefs_inode);
449 orangefs_inode->getattr_time = jiffies - 1;
455 int orangefs_flush_inode(struct inode *inode)
458 * If it is a dirty inode, this function gets called.
459 * Gather all the information that needs to be setattr'ed
460 * Right now, this will only be used for mode, atime, mtime
469 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
471 memset(&wbattr, 0, sizeof(wbattr));
474 * check inode flags up front, and clear them if they are set. This
475 * will prevent multiple processes from all trying to flush the same
476 * inode if they call close() simultaneously
478 mtime_flag = MtimeFlag(orangefs_inode);
479 ClearMtimeFlag(orangefs_inode);
480 ctime_flag = CtimeFlag(orangefs_inode);
481 ClearCtimeFlag(orangefs_inode);
482 atime_flag = AtimeFlag(orangefs_inode);
483 ClearAtimeFlag(orangefs_inode);
484 mode_flag = ModeFlag(orangefs_inode);
485 ClearModeFlag(orangefs_inode);
487 /* -- Lazy atime,mtime and ctime update --
488 * Note: all times are dictated by server in the new scheme
489 * and not by the clients
491 * Also mode updates are being handled now..
495 wbattr.ia_valid |= ATTR_MTIME;
497 wbattr.ia_valid |= ATTR_CTIME;
499 wbattr.ia_valid |= ATTR_ATIME;
502 wbattr.ia_mode = inode->i_mode;
503 wbattr.ia_valid |= ATTR_MODE;
506 gossip_debug(GOSSIP_UTILS_DEBUG,
507 "*********** orangefs_flush_inode: %pU "
509 get_khandle_from_ino(inode),
511 if (wbattr.ia_valid == 0) {
512 gossip_debug(GOSSIP_UTILS_DEBUG,
513 "orangefs_flush_inode skipping setattr()\n");
517 gossip_debug(GOSSIP_UTILS_DEBUG,
518 "orangefs_flush_inode (%pU) writing mode %o\n",
519 get_khandle_from_ino(inode),
522 ret = orangefs_inode_setattr(inode, &wbattr);
527 void orangefs_make_bad_inode(struct inode *inode)
529 if (is_root_handle(inode)) {
531 * if this occurs, the pvfs2-client-core was killed but we
532 * can't afford to lose the inode operations and such
533 * associated with the root handle in any case.
535 gossip_debug(GOSSIP_UTILS_DEBUG,
536 "*** NOT making bad root inode %pU\n",
537 get_khandle_from_ino(inode));
539 gossip_debug(GOSSIP_UTILS_DEBUG,
540 "*** making bad inode %pU\n",
541 get_khandle_from_ino(inode));
542 make_bad_inode(inode);
547 * The following is a very dirty hack that is now a permanent part of the
548 * ORANGEFS protocol. See protocol.h for more error definitions.
551 /* The order matches include/orangefs-types.h in the OrangeFS source. */
552 static int PINT_errno_mapping[] = {
553 0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
554 EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
555 EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
556 ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
557 EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
558 EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
559 ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
560 EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
561 ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
562 EACCES, ECONNRESET, ERANGE
565 int orangefs_normalize_to_errno(__s32 error_code)
570 if (error_code == 0) {
573 * This shouldn't ever happen. If it does it should be fixed on the
576 } else if (error_code > 0) {
577 gossip_err("orangefs: error status receieved.\n");
578 gossip_err("orangefs: assuming error code is inverted.\n");
579 error_code = -error_code;
583 * XXX: This is very bad since error codes from ORANGEFS may not be
584 * suitable for return into userspace.
588 * Convert ORANGEFS error values into errno values suitable for return
591 if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
593 (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
594 ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
596 * cancellation error codes generally correspond to
597 * a timeout from the client's perspective
599 error_code = -ETIMEDOUT;
601 /* assume a default error code */
602 gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code);
603 error_code = -EINVAL;
606 /* Convert ORANGEFS encoded errno values into regular errno values. */
607 } else if ((-error_code) & ORANGEFS_ERROR_BIT) {
608 i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
609 if (i < sizeof(PINT_errno_mapping)/sizeof(*PINT_errno_mapping))
610 error_code = -PINT_errno_mapping[i];
612 error_code = -EINVAL;
615 * Only ORANGEFS protocol error codes should ever come here. Otherwise
616 * there is a bug somewhere.
619 gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n");
625 __s32 ORANGEFS_util_translate_mode(int mode)
629 static int modes[NUM_MODES] = {
630 S_IXOTH, S_IWOTH, S_IROTH,
631 S_IXGRP, S_IWGRP, S_IRGRP,
632 S_IXUSR, S_IWUSR, S_IRUSR,
635 static int orangefs_modes[NUM_MODES] = {
636 ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
637 ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
638 ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
639 ORANGEFS_G_SGID, ORANGEFS_U_SUID
642 for (i = 0; i < NUM_MODES; i++)
644 ret |= orangefs_modes[i];