2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
5 This program can be distributed under the terms of the GNU GPL.
12 #include <linux/fuse.h>
14 #include <linux/mount.h>
15 #include <linux/wait.h>
16 #include <linux/list.h>
17 #include <linux/spinlock.h>
19 #include <linux/backing-dev.h>
20 #include <linux/mutex.h>
21 #include <linux/rwsem.h>
22 #include <linux/rbtree.h>
23 #include <linux/poll.h>
24 #include <linux/workqueue.h>
25 #include <linux/kref.h>
26 #include <linux/xattr.h>
27 #include <linux/pid_namespace.h>
28 #include <linux/refcount.h>
30 /** Max number of pages that can be used in a single read request */
31 #define FUSE_MAX_PAGES_PER_REQ 32
33 /** Bias for fi->writectr, meaning new writepages must not be sent */
34 #define FUSE_NOWRITE INT_MIN
36 /** It could be as large as PATH_MAX, but would that have any uses? */
37 #define FUSE_NAME_MAX 1024
39 /** Number of dentries for each connection in the control filesystem */
40 #define FUSE_CTL_NUM_DENTRIES 5
42 /** Number of page pointers embedded in fuse_req */
43 #define FUSE_REQ_INLINE_PAGES 1
45 /** List of active connections */
46 extern struct list_head fuse_conn_list;
48 /** Global mutex protecting fuse_conn_list and the control filesystem */
49 extern struct mutex fuse_mutex;
51 /** Module parameters */
52 extern unsigned max_user_bgreq;
53 extern unsigned max_user_congthresh;
55 /* One forget request */
56 struct fuse_forget_link {
57 struct fuse_forget_one forget_one;
58 struct fuse_forget_link *next;
66 /** Unique ID, which identifies the inode between userspace
70 /** Number of lookups on this inode */
73 /** The request used for sending the FORGET message */
74 struct fuse_forget_link *forget;
76 /** Time in jiffies until the file attributes are valid */
79 /** The sticky bit in inode->i_mode may have been removed, so
80 preserve the original mode */
83 /** 64 bit inode number */
86 /** Version of last attribute change */
89 /** Files usable in writepage. Protected by fc->lock */
90 struct list_head write_files;
92 /** Writepages pending on truncate or fsync */
93 struct list_head queued_writes;
95 /** Number of sent writes, a negative bias (FUSE_NOWRITE)
96 * means more writes are blocked */
99 /** Waitq for writepage completion */
100 wait_queue_head_t page_waitq;
102 /** List of writepage requestst (pending or sent) */
103 struct list_head writepages;
105 /** Miscellaneous bits describing inode state */
108 /** Lock for serializing lookup and readdir for back compatibility*/
112 /** FUSE inode state bits */
114 /** Advise readdirplus */
115 FUSE_I_ADVISE_RDPLUS,
116 /** Initialized with readdirplus */
118 /** An operation changing file size is in progress */
119 FUSE_I_SIZE_UNSTABLE,
126 /** FUSE specific file data */
128 /** Fuse connection for this file */
129 struct fuse_conn *fc;
131 /** Request reserved for flush and release */
132 struct fuse_req *reserved_req;
134 /** Kernel file handle guaranteed to be unique */
137 /** File handle used by userspace */
140 /** Node id of this file */
146 /** FOPEN_* flags returned by open */
149 /** Entry on inode's write_files list */
150 struct list_head write_entry;
152 /** RB node to be linked on fuse_conn->polled_files */
153 struct rb_node polled_node;
155 /** Wait queue head for poll */
156 wait_queue_head_t poll_wait;
158 /** Has flock been performed on this file? */
162 /** One input argument of a request */
168 /** The request input */
170 /** The request header */
171 struct fuse_in_header h;
173 /** True if the data for the last argument is in req->pages */
176 /** Number of arguments */
179 /** Array of arguments */
180 struct fuse_in_arg args[3];
183 /** One output argument of a request */
189 /** The request output */
191 /** Header returned from userspace */
192 struct fuse_out_header h;
195 * The following bitfields are not changed during the request
199 /** Last argument is variable length (can be shorter than
203 /** Last argument is a list of pages to copy data to */
206 /** Zero partially or not copied pages */
207 unsigned page_zeroing:1;
209 /** Pages may be replaced with new ones */
210 unsigned page_replace:1;
212 /** Number or arguments */
215 /** Array of arguments */
216 struct fuse_arg args[2];
219 /** FUSE page descriptor */
220 struct fuse_page_desc {
232 struct fuse_in_arg args[3];
238 struct fuse_arg args[2];
242 #define FUSE_ARGS(args) struct fuse_args args = {}
244 /** The request IO state (for asynchronous processing) */
245 struct fuse_io_priv {
257 struct completion *done;
261 #define FUSE_IO_PRIV_SYNC(i) \
263 .refcnt = KREF_INIT(1), \
271 * FR_ISREPLY: set if the request has reply
272 * FR_FORCE: force sending of the request even if interrupted
273 * FR_BACKGROUND: request is sent in the background
274 * FR_WAITING: request is counted as "waiting"
275 * FR_ABORTED: the request was aborted
276 * FR_INTERRUPTED: the request has been interrupted
277 * FR_LOCKED: data is being copied to/from the request
278 * FR_PENDING: request is not yet in userspace
279 * FR_SENT: request is in userspace, waiting for an answer
280 * FR_FINISHED: request is finished
281 * FR_PRIVATE: request is on private list
298 * A request to the client
300 * .waitq.lock protects the following fields:
302 * - FR_LOCKED (may also be modified under fc->lock, tested under both)
305 /** This can be on either pending processing or io lists in
307 struct list_head list;
309 /** Entry on the interrupts list */
310 struct list_head intr_entry;
317 /** Unique ID for the interrupt request */
320 /* Request flags, updated with test/set/clear_bit() */
323 /** The request input */
326 /** The request output */
329 /** Used to wake up the task waiting for completion of request*/
330 wait_queue_head_t waitq;
332 /** Data for asynchronous requests */
335 struct fuse_release_in in;
338 struct fuse_init_in init_in;
339 struct fuse_init_out init_out;
340 struct cuse_init_in cuse_init_in;
342 struct fuse_read_in in;
346 struct fuse_write_in in;
347 struct fuse_write_out out;
348 struct fuse_req *next;
350 struct fuse_notify_retrieve_in retrieve_in;
356 /** page-descriptor vector */
357 struct fuse_page_desc *page_descs;
359 /** size of the 'pages' array */
362 /** inline page vector */
363 struct page *inline_pages[FUSE_REQ_INLINE_PAGES];
365 /** inline page-descriptor vector */
366 struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES];
368 /** number of pages in vector */
371 /** File used in the request (or NULL) */
372 struct fuse_file *ff;
374 /** Inode used in the request or NULL */
377 /** AIO control block */
378 struct fuse_io_priv *io;
380 /** Link on fi->writepages */
381 struct list_head writepages_entry;
383 /** Request completion callback */
384 void (*end)(struct fuse_conn *, struct fuse_req *);
386 /** Request is stolen from fuse_file->reserved_req */
387 struct file *stolen_file;
391 /** Connection established */
394 /** Readers of the connection are waiting on this */
395 wait_queue_head_t waitq;
397 /** The next unique request id */
400 /** The list of pending requests */
401 struct list_head pending;
403 /** Pending interrupts */
404 struct list_head interrupts;
406 /** Queue of pending forgets */
407 struct fuse_forget_link forget_list_head;
408 struct fuse_forget_link *forget_list_tail;
410 /** Batching of FORGET requests (positive indicates FORGET batch) */
413 /** O_ASYNC requests */
414 struct fasync_struct *fasync;
418 /** Connection established */
421 /** Lock protecting accessess to members of this structure */
424 /** The list of requests being processed */
425 struct list_head processing;
427 /** The list of requests under I/O */
432 * Fuse device instance
435 /** Fuse connection for this device */
436 struct fuse_conn *fc;
438 /** Processing queue */
439 struct fuse_pqueue pq;
441 /** list entry on fc->devices */
442 struct list_head entry;
448 * This structure is created, when the filesystem is mounted, and is
449 * destroyed, when the client device is closed and the filesystem is
453 /** Lock protecting accessess to members of this structure */
459 /** Number of fuse_dev's */
464 /** The user id for this mount */
467 /** The group id for this mount */
470 /** The pid namespace for this mount */
471 struct pid_namespace *pid_ns;
473 /** Maximum read size */
476 /** Maximum write size */
480 struct fuse_iqueue iq;
482 /** The next unique kernel file handle */
485 /** rbtree of fuse_files waiting for poll events indexed by ph */
486 struct rb_root polled_files;
488 /** Maximum number of outstanding background requests */
489 unsigned max_background;
491 /** Number of background requests at which congestion starts */
492 unsigned congestion_threshold;
494 /** Number of requests currently in the background */
495 unsigned num_background;
497 /** Number of background requests currently queued for userspace */
498 unsigned active_background;
500 /** The list of background requests set aside for later queuing */
501 struct list_head bg_queue;
503 /** Flag indicating that INIT reply has been received. Allocating
504 * any fuse request will be suspended until the flag is set */
507 /** Flag indicating if connection is blocked. This will be
508 the case before the INIT reply is received, and if there
509 are too many outstading backgrounds requests */
512 /** waitq for blocked connection */
513 wait_queue_head_t blocked_waitq;
515 /** waitq for reserved requests */
516 wait_queue_head_t reserved_req_waitq;
518 /** Connection established, cleared on umount, connection
519 abort and device release */
522 /** Connection failed (version mismatch). Cannot race with
523 setting other bitfields since it is only set once in INIT
524 reply, before any other request, and never cleared */
525 unsigned conn_error:1;
527 /** Connection successful. Only set in INIT */
528 unsigned conn_init:1;
530 /** Do readpages asynchronously? Only set in INIT */
531 unsigned async_read:1;
533 /** Do not send separate SETATTR request before open(O_TRUNC) */
534 unsigned atomic_o_trunc:1;
536 /** Filesystem supports NFS exporting. Only set in INIT */
537 unsigned export_support:1;
539 /** write-back cache policy (default is write-through) */
540 unsigned writeback_cache:1;
542 /** allow parallel lookups and readdir (default is serialized) */
543 unsigned parallel_dirops:1;
545 /** handle fs handles killing suid/sgid/cap on write/chown/trunc */
546 unsigned handle_killpriv:1;
549 * The following bitfields are only for optimization purposes
550 * and hence races in setting them will not cause malfunction
553 /** Is open/release not implemented by fs? */
556 /** Is fsync not implemented by fs? */
559 /** Is fsyncdir not implemented by fs? */
560 unsigned no_fsyncdir:1;
562 /** Is flush not implemented by fs? */
565 /** Is setxattr not implemented by fs? */
566 unsigned no_setxattr:1;
568 /** Is getxattr not implemented by fs? */
569 unsigned no_getxattr:1;
571 /** Is listxattr not implemented by fs? */
572 unsigned no_listxattr:1;
574 /** Is removexattr not implemented by fs? */
575 unsigned no_removexattr:1;
577 /** Are posix file locking primitives not implemented by fs? */
580 /** Is access not implemented by fs? */
581 unsigned no_access:1;
583 /** Is create not implemented by fs? */
584 unsigned no_create:1;
586 /** Is interrupt not implemented by fs? */
587 unsigned no_interrupt:1;
589 /** Is bmap not implemented by fs? */
592 /** Is poll not implemented by fs? */
595 /** Do multi-page cached writes */
596 unsigned big_writes:1;
598 /** Don't apply umask to creation modes */
599 unsigned dont_mask:1;
601 /** Are BSD file locking primitives not implemented by fs? */
604 /** Is fallocate not implemented by fs? */
605 unsigned no_fallocate:1;
607 /** Is rename with flags implemented by fs? */
608 unsigned no_rename2:1;
610 /** Use enhanced/automatic page cache invalidation. */
611 unsigned auto_inval_data:1;
613 /** Does the filesystem support readdirplus? */
614 unsigned do_readdirplus:1;
616 /** Does the filesystem want adaptive readdirplus? */
617 unsigned readdirplus_auto:1;
619 /** Does the filesystem support asynchronous direct-IO submission? */
620 unsigned async_dio:1;
622 /** Is lseek not implemented by fs? */
625 /** Does the filesystem support posix acls? */
626 unsigned posix_acl:1;
628 /** Check permissions based on the file mode or not? */
629 unsigned default_permissions:1;
631 /** Allow other than the mounter user to access the filesystem ? */
632 unsigned allow_other:1;
634 /** The number of requests waiting for completion */
635 atomic_t num_waiting;
637 /** Negotiated minor version */
640 /** Entry on the fuse_conn_list */
641 struct list_head entry;
643 /** Device ID from super block */
646 /** Dentries in the control filesystem */
647 struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];
649 /** number of dentries used in the above array */
652 /** Key for lock owner ID scrambling */
655 /** Reserved request for the DESTROY message */
656 struct fuse_req *destroy_req;
658 /** Version counter for attribute changes */
661 /** Called on final put */
662 void (*release)(struct fuse_conn *);
664 /** Super block for this connection. */
665 struct super_block *sb;
667 /** Read/write semaphore to hold when accessing sb. */
668 struct rw_semaphore killsb;
670 /** List of device instances belonging to this connection */
671 struct list_head devices;
674 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
676 return sb->s_fs_info;
679 static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
681 return get_fuse_conn_super(inode->i_sb);
684 static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
686 return container_of(inode, struct fuse_inode, inode);
689 static inline u64 get_node_id(struct inode *inode)
691 return get_fuse_inode(inode)->nodeid;
694 static inline void fuse_make_bad(struct inode *inode)
696 remove_inode_hash(inode);
697 set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state);
700 static inline bool fuse_is_bad(struct inode *inode)
702 return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state));
705 /** Device operations */
706 extern const struct file_operations fuse_dev_operations;
708 extern const struct dentry_operations fuse_dentry_operations;
709 extern const struct dentry_operations fuse_root_dentry_operations;
712 * Inode to nodeid comparison.
714 int fuse_inode_eq(struct inode *inode, void *_nodeidp);
717 * Get a filled in inode
719 struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
720 int generation, struct fuse_attr *attr,
721 u64 attr_valid, u64 attr_version);
723 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
724 struct fuse_entry_out *outarg, struct inode **inode);
727 * Send FORGET command
729 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
730 u64 nodeid, u64 nlookup);
732 struct fuse_forget_link *fuse_alloc_forget(void);
734 /* Used by READDIRPLUS */
735 void fuse_force_forget(struct file *file, u64 nodeid);
738 * Initialize READ or READDIR request
740 void fuse_read_fill(struct fuse_req *req, struct file *file,
741 loff_t pos, size_t count, int opcode);
744 * Send OPEN or OPENDIR request
746 int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
748 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
749 void fuse_file_free(struct fuse_file *ff);
750 void fuse_finish_open(struct inode *inode, struct file *file);
752 void fuse_sync_release(struct fuse_file *ff, int flags);
755 * Send RELEASE or RELEASEDIR request
757 void fuse_release_common(struct file *file, bool isdir);
760 * Send FSYNC or FSYNCDIR request
762 int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
763 int datasync, int isdir);
768 int fuse_notify_poll_wakeup(struct fuse_conn *fc,
769 struct fuse_notify_poll_wakeup_out *outarg);
772 * Initialize file operations on a regular file
774 void fuse_init_file_inode(struct inode *inode);
777 * Initialize inode operations on regular files and special files
779 void fuse_init_common(struct inode *inode);
782 * Initialize inode and file operations on a directory
784 void fuse_init_dir(struct inode *inode);
787 * Initialize inode operations on a symlink
789 void fuse_init_symlink(struct inode *inode);
792 * Change attributes of an inode
794 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
795 u64 attr_valid, u64 attr_version);
797 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
801 * Initialize the client device
803 int fuse_dev_init(void);
806 * Cleanup the client device
808 void fuse_dev_cleanup(void);
810 int fuse_ctl_init(void);
811 void __exit fuse_ctl_cleanup(void);
816 struct fuse_req *fuse_request_alloc(unsigned npages);
818 struct fuse_req *fuse_request_alloc_nofs(unsigned npages);
823 void fuse_request_free(struct fuse_req *req);
826 * Get a request, may fail with -ENOMEM,
827 * caller should specify # elements in req->pages[] explicitly
829 struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages);
830 struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
834 * Increment reference count on request
836 void __fuse_get_request(struct fuse_req *req);
839 * Gets a requests for a file operation, always succeeds
841 struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
845 * Decrement reference count of a request. If count goes to zero free
848 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
851 * Send a request (synchronous)
853 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
856 * Simple request sending that does request allocation and freeing
858 ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args);
861 * Send a request in the background
863 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
865 void fuse_request_send_background_locked(struct fuse_conn *fc,
866 struct fuse_req *req);
868 /* Abort all requests */
869 void fuse_abort_conn(struct fuse_conn *fc);
870 void fuse_wait_aborted(struct fuse_conn *fc);
873 * Invalidate inode attributes
875 void fuse_invalidate_attr(struct inode *inode);
877 void fuse_invalidate_entry_cache(struct dentry *entry);
879 void fuse_invalidate_atime(struct inode *inode);
882 * Acquire reference to fuse_conn
884 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
887 * Initialize fuse_conn
889 void fuse_conn_init(struct fuse_conn *fc);
892 * Release reference to fuse_conn
894 void fuse_conn_put(struct fuse_conn *fc);
896 struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc);
897 void fuse_dev_free(struct fuse_dev *fud);
900 * Add connection to control filesystem
902 int fuse_ctl_add_conn(struct fuse_conn *fc);
905 * Remove connection from control filesystem
907 void fuse_ctl_remove_conn(struct fuse_conn *fc);
910 * Is file type valid?
912 int fuse_valid_type(int m);
914 bool fuse_invalid_attr(struct fuse_attr *attr);
917 * Is current process allowed to perform filesystem operation?
919 int fuse_allow_current_process(struct fuse_conn *fc);
921 u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
923 void fuse_update_ctime(struct inode *inode);
925 int fuse_update_attributes(struct inode *inode, struct file *file);
927 void fuse_flush_writepages(struct inode *inode);
929 void fuse_set_nowrite(struct inode *inode);
930 void fuse_release_nowrite(struct inode *inode);
932 u64 fuse_get_attr_version(struct fuse_conn *fc);
935 * File-system tells the kernel to invalidate cache for the given node id.
937 int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
938 loff_t offset, loff_t len);
941 * File-system tells the kernel to invalidate parent attributes and
942 * the dentry matching parent/name.
944 * If the child_nodeid is non-zero and:
945 * - matches the inode number for the dentry matching parent/name,
946 * - is not a mount point
947 * - is a file or oan empty directory
948 * then the dentry is unhashed (d_delete()).
950 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
951 u64 child_nodeid, struct qstr *name);
953 int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
957 * fuse_direct_io() flags
960 /** If set, it is WRITE; otherwise - READ */
961 #define FUSE_DIO_WRITE (1 << 0)
963 /** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */
964 #define FUSE_DIO_CUSE (1 << 1)
966 ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
967 loff_t *ppos, int flags);
968 long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
970 long fuse_ioctl_common(struct file *file, unsigned int cmd,
971 unsigned long arg, unsigned int flags);
972 unsigned fuse_file_poll(struct file *file, poll_table *wait);
973 int fuse_dev_release(struct inode *inode, struct file *file);
975 bool fuse_write_update_size(struct inode *inode, loff_t pos);
977 int fuse_flush_times(struct inode *inode, struct fuse_file *ff);
978 int fuse_write_inode(struct inode *inode, struct writeback_control *wbc);
980 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
983 void fuse_set_initialized(struct fuse_conn *fc);
985 void fuse_unlock_inode(struct inode *inode, bool locked);
986 bool fuse_lock_inode(struct inode *inode);
988 int fuse_setxattr(struct inode *inode, const char *name, const void *value,
989 size_t size, int flags);
990 ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
992 ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size);
993 int fuse_removexattr(struct inode *inode, const char *name);
994 extern const struct xattr_handler *fuse_xattr_handlers[];
995 extern const struct xattr_handler *fuse_acl_xattr_handlers[];
998 struct posix_acl *fuse_get_acl(struct inode *inode, int type);
999 int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);
1001 #endif /* _FS_FUSE_I_H */