2 * (C) 2001 Clemson University and The University of Chicago
4 * See COPYING in top-level directory.
7 #include "orangefs-kernel.h"
8 #include "orangefs-dev-proto.h"
9 #include "orangefs-bufmap.h"
11 __s32 fsid_of_op(struct orangefs_kernel_op_s *op)
13 __s32 fsid = ORANGEFS_FS_ID_NULL;
16 switch (op->upcall.type) {
17 case ORANGEFS_VFS_OP_FILE_IO:
18 fsid = op->upcall.req.io.refn.fs_id;
20 case ORANGEFS_VFS_OP_LOOKUP:
21 fsid = op->upcall.req.lookup.parent_refn.fs_id;
23 case ORANGEFS_VFS_OP_CREATE:
24 fsid = op->upcall.req.create.parent_refn.fs_id;
26 case ORANGEFS_VFS_OP_GETATTR:
27 fsid = op->upcall.req.getattr.refn.fs_id;
29 case ORANGEFS_VFS_OP_REMOVE:
30 fsid = op->upcall.req.remove.parent_refn.fs_id;
32 case ORANGEFS_VFS_OP_MKDIR:
33 fsid = op->upcall.req.mkdir.parent_refn.fs_id;
35 case ORANGEFS_VFS_OP_READDIR:
36 fsid = op->upcall.req.readdir.refn.fs_id;
38 case ORANGEFS_VFS_OP_SETATTR:
39 fsid = op->upcall.req.setattr.refn.fs_id;
41 case ORANGEFS_VFS_OP_SYMLINK:
42 fsid = op->upcall.req.sym.parent_refn.fs_id;
44 case ORANGEFS_VFS_OP_RENAME:
45 fsid = op->upcall.req.rename.old_parent_refn.fs_id;
47 case ORANGEFS_VFS_OP_STATFS:
48 fsid = op->upcall.req.statfs.fs_id;
50 case ORANGEFS_VFS_OP_TRUNCATE:
51 fsid = op->upcall.req.truncate.refn.fs_id;
53 case ORANGEFS_VFS_OP_RA_FLUSH:
54 fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
56 case ORANGEFS_VFS_OP_FS_UMOUNT:
57 fsid = op->upcall.req.fs_umount.fs_id;
59 case ORANGEFS_VFS_OP_GETXATTR:
60 fsid = op->upcall.req.getxattr.refn.fs_id;
62 case ORANGEFS_VFS_OP_SETXATTR:
63 fsid = op->upcall.req.setxattr.refn.fs_id;
65 case ORANGEFS_VFS_OP_LISTXATTR:
66 fsid = op->upcall.req.listxattr.refn.fs_id;
68 case ORANGEFS_VFS_OP_REMOVEXATTR:
69 fsid = op->upcall.req.removexattr.refn.fs_id;
71 case ORANGEFS_VFS_OP_FSYNC:
72 fsid = op->upcall.req.fsync.refn.fs_id;
81 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
84 if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
87 flags &= ~S_IMMUTABLE;
88 if (attrs->flags & ORANGEFS_APPEND_FL)
92 if (attrs->flags & ORANGEFS_NOATIME_FL)
99 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
103 if (attrs->perms & ORANGEFS_O_EXECUTE)
104 perm_mode |= S_IXOTH;
105 if (attrs->perms & ORANGEFS_O_WRITE)
106 perm_mode |= S_IWOTH;
107 if (attrs->perms & ORANGEFS_O_READ)
108 perm_mode |= S_IROTH;
110 if (attrs->perms & ORANGEFS_G_EXECUTE)
111 perm_mode |= S_IXGRP;
112 if (attrs->perms & ORANGEFS_G_WRITE)
113 perm_mode |= S_IWGRP;
114 if (attrs->perms & ORANGEFS_G_READ)
115 perm_mode |= S_IRGRP;
117 if (attrs->perms & ORANGEFS_U_EXECUTE)
118 perm_mode |= S_IXUSR;
119 if (attrs->perms & ORANGEFS_U_WRITE)
120 perm_mode |= S_IWUSR;
121 if (attrs->perms & ORANGEFS_U_READ)
122 perm_mode |= S_IRUSR;
124 if (attrs->perms & ORANGEFS_G_SGID)
125 perm_mode |= S_ISGID;
126 if (attrs->perms & ORANGEFS_U_SUID)
127 perm_mode |= S_ISUID;
133 * NOTE: in kernel land, we never use the sys_attr->link_target for
134 * anything, so don't bother copying it into the sys_attr object here.
136 static inline int copy_attributes_from_inode(struct inode *inode,
137 struct ORANGEFS_sys_attr_s *attrs,
142 if (!iattr || !inode || !attrs) {
143 gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
144 "in copy_attributes_from_inode!\n",
151 * We need to be careful to only copy the attributes out of the
152 * iattr object that we know are valid.
155 if (iattr->ia_valid & ATTR_UID) {
156 attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid);
157 attrs->mask |= ORANGEFS_ATTR_SYS_UID;
158 gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
160 if (iattr->ia_valid & ATTR_GID) {
161 attrs->group = from_kgid(&init_user_ns, iattr->ia_gid);
162 attrs->mask |= ORANGEFS_ATTR_SYS_GID;
163 gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
166 if (iattr->ia_valid & ATTR_ATIME) {
167 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
168 if (iattr->ia_valid & ATTR_ATIME_SET) {
169 attrs->atime = (time64_t)iattr->ia_atime.tv_sec;
170 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
173 if (iattr->ia_valid & ATTR_MTIME) {
174 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
175 if (iattr->ia_valid & ATTR_MTIME_SET) {
176 attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec;
177 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
180 if (iattr->ia_valid & ATTR_CTIME)
181 attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
184 * ORANGEFS cannot set size with a setattr operation. Probably not likely
185 * to be requested through the VFS, but just in case, don't worry about
189 if (iattr->ia_valid & ATTR_MODE) {
190 tmp_mode = iattr->ia_mode;
191 if (tmp_mode & (S_ISVTX)) {
192 if (is_root_handle(inode)) {
194 * allow sticky bit to be set on root (since
195 * it shows up that way by default anyhow),
196 * but don't show it to the server
200 gossip_debug(GOSSIP_UTILS_DEBUG,
201 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
206 if (tmp_mode & (S_ISUID)) {
207 gossip_debug(GOSSIP_UTILS_DEBUG,
208 "Attempting to set setuid bit (not supported); returning EINVAL.\n");
212 attrs->perms = ORANGEFS_util_translate_mode(tmp_mode);
213 attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
219 static int orangefs_inode_type(enum orangefs_ds_type objtype)
221 if (objtype == ORANGEFS_TYPE_METAFILE)
223 else if (objtype == ORANGEFS_TYPE_DIRECTORY)
225 else if (objtype == ORANGEFS_TYPE_SYMLINK)
231 static int orangefs_inode_is_stale(struct inode *inode, int new,
232 struct ORANGEFS_sys_attr_s *attrs, char *link_target)
234 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
235 int type = orangefs_inode_type(attrs->objtype);
238 * If the inode type or symlink target have changed then this
241 if (type == -1 || !(inode->i_mode & type)) {
242 orangefs_make_bad_inode(inode);
245 if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
246 link_target, ORANGEFS_NAME_MAX)) {
247 orangefs_make_bad_inode(inode);
254 int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
256 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
257 struct orangefs_kernel_op_s *new_op;
258 loff_t inode_size, rounded_up_size;
261 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
262 get_khandle_from_ino(inode));
264 if (!new && !bypass) {
265 if (time_before(jiffies, orangefs_inode->getattr_time))
269 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
272 new_op->upcall.req.getattr.refn = orangefs_inode->refn;
273 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
275 ret = service_operation(new_op, __func__,
276 get_interruptible_flag(inode));
280 type = orangefs_inode_type(new_op->
281 downcall.resp.getattr.attributes.objtype);
282 ret = orangefs_inode_is_stale(inode, new,
283 &new_op->downcall.resp.getattr.attributes,
284 new_op->downcall.resp.getattr.link_target);
292 inode->i_flags = orangefs_inode_flags(&new_op->
293 downcall.resp.getattr.attributes);
294 inode_size = (loff_t)new_op->
295 downcall.resp.getattr.attributes.size;
297 (inode_size + (4096 - (inode_size % 4096)));
298 inode->i_size = inode_size;
299 orangefs_inode->blksize =
300 new_op->downcall.resp.getattr.attributes.blksize;
301 spin_lock(&inode->i_lock);
302 inode->i_bytes = inode_size;
304 (unsigned long)(rounded_up_size / 512);
305 spin_unlock(&inode->i_lock);
308 inode->i_size = PAGE_SIZE;
309 orangefs_inode->blksize = i_blocksize(inode);
310 spin_lock(&inode->i_lock);
311 inode_set_bytes(inode, inode->i_size);
312 spin_unlock(&inode->i_lock);
317 inode->i_size = (loff_t)strlen(new_op->
318 downcall.resp.getattr.link_target);
319 orangefs_inode->blksize = i_blocksize(inode);
320 ret = strscpy(orangefs_inode->link_target,
321 new_op->downcall.resp.getattr.link_target,
327 inode->i_link = orangefs_inode->link_target;
332 inode->i_uid = make_kuid(&init_user_ns, new_op->
333 downcall.resp.getattr.attributes.owner);
334 inode->i_gid = make_kgid(&init_user_ns, new_op->
335 downcall.resp.getattr.attributes.group);
336 inode->i_atime.tv_sec = (time64_t)new_op->
337 downcall.resp.getattr.attributes.atime;
338 inode->i_mtime.tv_sec = (time64_t)new_op->
339 downcall.resp.getattr.attributes.mtime;
340 inode->i_ctime.tv_sec = (time64_t)new_op->
341 downcall.resp.getattr.attributes.ctime;
342 inode->i_atime.tv_nsec = 0;
343 inode->i_mtime.tv_nsec = 0;
344 inode->i_ctime.tv_nsec = 0;
346 /* special case: mark the root inode as sticky */
347 inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
348 orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
350 orangefs_inode->getattr_time = jiffies +
351 orangefs_getattr_timeout_msecs*HZ/1000;
358 int orangefs_inode_check_changed(struct inode *inode)
360 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
361 struct orangefs_kernel_op_s *new_op;
364 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
365 get_khandle_from_ino(inode));
367 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
370 new_op->upcall.req.getattr.refn = orangefs_inode->refn;
371 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
372 ORANGEFS_ATTR_SYS_LNK_TARGET;
374 ret = service_operation(new_op, __func__,
375 get_interruptible_flag(inode));
379 ret = orangefs_inode_is_stale(inode, 0,
380 &new_op->downcall.resp.getattr.attributes,
381 new_op->downcall.resp.getattr.link_target);
388 * issues a orangefs setattr request to make sure the new attribute values
389 * take effect if successful. returns 0 on success; -errno otherwise
391 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
393 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
394 struct orangefs_kernel_op_s *new_op;
397 new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
401 new_op->upcall.req.setattr.refn = orangefs_inode->refn;
402 ret = copy_attributes_from_inode(inode,
403 &new_op->upcall.req.setattr.attributes,
406 ret = service_operation(new_op, __func__,
407 get_interruptible_flag(inode));
409 gossip_debug(GOSSIP_UTILS_DEBUG,
410 "orangefs_inode_setattr: returning %d\n",
417 * successful setattr should clear the atime, mtime and
421 ClearAtimeFlag(orangefs_inode);
422 ClearMtimeFlag(orangefs_inode);
423 ClearCtimeFlag(orangefs_inode);
424 ClearModeFlag(orangefs_inode);
425 orangefs_inode->getattr_time = jiffies - 1;
431 int orangefs_flush_inode(struct inode *inode)
434 * If it is a dirty inode, this function gets called.
435 * Gather all the information that needs to be setattr'ed
436 * Right now, this will only be used for mode, atime, mtime
445 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
447 memset(&wbattr, 0, sizeof(wbattr));
450 * check inode flags up front, and clear them if they are set. This
451 * will prevent multiple processes from all trying to flush the same
452 * inode if they call close() simultaneously
454 mtime_flag = MtimeFlag(orangefs_inode);
455 ClearMtimeFlag(orangefs_inode);
456 ctime_flag = CtimeFlag(orangefs_inode);
457 ClearCtimeFlag(orangefs_inode);
458 atime_flag = AtimeFlag(orangefs_inode);
459 ClearAtimeFlag(orangefs_inode);
460 mode_flag = ModeFlag(orangefs_inode);
461 ClearModeFlag(orangefs_inode);
463 /* -- Lazy atime,mtime and ctime update --
464 * Note: all times are dictated by server in the new scheme
465 * and not by the clients
467 * Also mode updates are being handled now..
471 wbattr.ia_valid |= ATTR_MTIME;
473 wbattr.ia_valid |= ATTR_CTIME;
475 wbattr.ia_valid |= ATTR_ATIME;
478 wbattr.ia_mode = inode->i_mode;
479 wbattr.ia_valid |= ATTR_MODE;
482 gossip_debug(GOSSIP_UTILS_DEBUG,
483 "*********** orangefs_flush_inode: %pU "
485 get_khandle_from_ino(inode),
487 if (wbattr.ia_valid == 0) {
488 gossip_debug(GOSSIP_UTILS_DEBUG,
489 "orangefs_flush_inode skipping setattr()\n");
493 gossip_debug(GOSSIP_UTILS_DEBUG,
494 "orangefs_flush_inode (%pU) writing mode %o\n",
495 get_khandle_from_ino(inode),
498 ret = orangefs_inode_setattr(inode, &wbattr);
503 int orangefs_unmount_sb(struct super_block *sb)
506 struct orangefs_kernel_op_s *new_op = NULL;
508 gossip_debug(GOSSIP_UTILS_DEBUG,
509 "orangefs_unmount_sb called on sb %p\n",
512 new_op = op_alloc(ORANGEFS_VFS_OP_FS_UMOUNT);
515 new_op->upcall.req.fs_umount.id = ORANGEFS_SB(sb)->id;
516 new_op->upcall.req.fs_umount.fs_id = ORANGEFS_SB(sb)->fs_id;
517 strncpy(new_op->upcall.req.fs_umount.orangefs_config_server,
518 ORANGEFS_SB(sb)->devname,
519 ORANGEFS_MAX_SERVER_ADDR_LEN);
521 gossip_debug(GOSSIP_UTILS_DEBUG,
522 "Attempting ORANGEFS Unmount via host %s\n",
523 new_op->upcall.req.fs_umount.orangefs_config_server);
525 ret = service_operation(new_op, "orangefs_fs_umount", 0);
527 gossip_debug(GOSSIP_UTILS_DEBUG,
528 "orangefs_unmount: got return value of %d\n", ret);
532 ORANGEFS_SB(sb)->mount_pending = 1;
538 void orangefs_make_bad_inode(struct inode *inode)
540 if (is_root_handle(inode)) {
542 * if this occurs, the pvfs2-client-core was killed but we
543 * can't afford to lose the inode operations and such
544 * associated with the root handle in any case.
546 gossip_debug(GOSSIP_UTILS_DEBUG,
547 "*** NOT making bad root inode %pU\n",
548 get_khandle_from_ino(inode));
550 gossip_debug(GOSSIP_UTILS_DEBUG,
551 "*** making bad inode %pU\n",
552 get_khandle_from_ino(inode));
553 make_bad_inode(inode);
558 * The following is a very dirty hack that is now a permanent part of the
559 * ORANGEFS protocol. See protocol.h for more error definitions.
562 /* The order matches include/orangefs-types.h in the OrangeFS source. */
563 static int PINT_errno_mapping[] = {
564 0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
565 EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
566 EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
567 ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
568 EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
569 EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
570 ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
571 EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
572 ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
573 EACCES, ECONNRESET, ERANGE
576 int orangefs_normalize_to_errno(__s32 error_code)
581 if (error_code == 0) {
584 * This shouldn't ever happen. If it does it should be fixed on the
587 } else if (error_code > 0) {
588 gossip_err("orangefs: error status receieved.\n");
589 gossip_err("orangefs: assuming error code is inverted.\n");
590 error_code = -error_code;
594 * XXX: This is very bad since error codes from ORANGEFS may not be
595 * suitable for return into userspace.
599 * Convert ORANGEFS error values into errno values suitable for return
602 if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
604 (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
605 ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
607 * cancellation error codes generally correspond to
608 * a timeout from the client's perspective
610 error_code = -ETIMEDOUT;
612 /* assume a default error code */
613 gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code);
614 error_code = -EINVAL;
617 /* Convert ORANGEFS encoded errno values into regular errno values. */
618 } else if ((-error_code) & ORANGEFS_ERROR_BIT) {
619 i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
620 if (i < sizeof(PINT_errno_mapping)/sizeof(*PINT_errno_mapping))
621 error_code = -PINT_errno_mapping[i];
623 error_code = -EINVAL;
626 * Only ORANGEFS protocol error codes should ever come here. Otherwise
627 * there is a bug somewhere.
630 gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n");
636 __s32 ORANGEFS_util_translate_mode(int mode)
640 static int modes[NUM_MODES] = {
641 S_IXOTH, S_IWOTH, S_IROTH,
642 S_IXGRP, S_IWGRP, S_IRGRP,
643 S_IXUSR, S_IWUSR, S_IRUSR,
646 static int orangefs_modes[NUM_MODES] = {
647 ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
648 ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
649 ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
650 ORANGEFS_G_SGID, ORANGEFS_U_SUID
653 for (i = 0; i < NUM_MODES; i++)
655 ret |= orangefs_modes[i];