4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
47 static inline int cifs_convert_flags(unsigned int flags)
49 if ((flags & O_ACCMODE) == O_RDONLY)
51 else if ((flags & O_ACCMODE) == O_WRONLY)
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
65 static u32 cifs_posix_convert_flags(unsigned int flags)
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
94 posix_flags |= SMB_O_DIRECT;
99 static inline int cifs_get_disposition(unsigned int flags)
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
125 cifs_dbg(FYI, "posix open %s\n", full_path);
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
131 tlink = cifs_sb_tlink(cifs_sb);
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_remap(cifs_sb));
144 cifs_put_tlink(tlink);
149 if (presp_data->Type == cpu_to_le32(-1))
150 goto posix_open_ret; /* open ok, caller does qpathinfo */
153 goto posix_open_ret; /* caller does not need info */
155 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157 /* get new inode and set it up */
158 if (*pinode == NULL) {
159 cifs_fill_uniqueid(sb, &fattr);
160 *pinode = cifs_iget(sb, &fattr);
166 cifs_revalidate_mapping(*pinode);
167 cifs_fattr_to_inode(*pinode, &fattr);
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
183 int create_options = CREATE_NOT_DIR;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
188 if (!server->ops->open)
191 desired_access = cifs_convert_flags(f_flags);
193 /*********************************************************************
194 * open flag mapping table:
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
217 disposition = cifs_get_disposition(f_flags);
219 /* BB pass O_SYNC flag through on file attributes .. BB */
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
228 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229 if (f_flags & O_SYNC)
230 create_options |= CREATE_WRITE_THROUGH;
232 if (f_flags & O_DIRECT)
233 create_options |= CREATE_NO_BUFFER;
236 oparms.cifs_sb = cifs_sb;
237 oparms.desired_access = desired_access;
238 oparms.create_options = create_options;
239 oparms.disposition = disposition;
240 oparms.path = full_path;
242 oparms.reconnect = false;
244 rc = server->ops->open(xid, &oparms, oplock, buf);
250 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
253 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
257 server->ops->close(xid, tcon, fid);
268 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
270 struct cifs_fid_locks *cur;
271 bool has_locks = false;
273 down_read(&cinode->lock_sem);
274 list_for_each_entry(cur, &cinode->llist, llist) {
275 if (!list_empty(&cur->locks)) {
280 up_read(&cinode->lock_sem);
285 cifs_down_write(struct rw_semaphore *sem)
287 while (!down_write_trylock(sem))
291 struct cifsFileInfo *
292 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
293 struct tcon_link *tlink, __u32 oplock)
295 struct dentry *dentry = file->f_path.dentry;
296 struct inode *inode = d_inode(dentry);
297 struct cifsInodeInfo *cinode = CIFS_I(inode);
298 struct cifsFileInfo *cfile;
299 struct cifs_fid_locks *fdlocks;
300 struct cifs_tcon *tcon = tlink_tcon(tlink);
301 struct TCP_Server_Info *server = tcon->ses->server;
303 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
307 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
313 INIT_LIST_HEAD(&fdlocks->locks);
314 fdlocks->cfile = cfile;
315 cfile->llist = fdlocks;
318 cfile->pid = current->tgid;
319 cfile->uid = current_fsuid();
320 cfile->dentry = dget(dentry);
321 cfile->f_flags = file->f_flags;
322 cfile->invalidHandle = false;
323 cfile->tlink = cifs_get_tlink(tlink);
324 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
325 mutex_init(&cfile->fh_mutex);
326 spin_lock_init(&cfile->file_info_lock);
328 cifs_sb_active(inode->i_sb);
331 * If the server returned a read oplock and we have mandatory brlocks,
332 * set oplock level to None.
334 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
335 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
339 cifs_down_write(&cinode->lock_sem);
340 list_add(&fdlocks->llist, &cinode->llist);
341 up_write(&cinode->lock_sem);
343 spin_lock(&tcon->open_file_lock);
344 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
345 oplock = fid->pending_open->oplock;
346 list_del(&fid->pending_open->olist);
348 fid->purge_cache = false;
349 server->ops->set_fid(cfile, fid, oplock);
351 list_add(&cfile->tlist, &tcon->openFileList);
353 /* if readable file instance put first in list*/
354 if (file->f_mode & FMODE_READ)
355 list_add(&cfile->flist, &cinode->openFileList);
357 list_add_tail(&cfile->flist, &cinode->openFileList);
358 spin_unlock(&tcon->open_file_lock);
360 if (fid->purge_cache)
361 cifs_zap_mapping(inode);
363 file->private_data = cfile;
367 struct cifsFileInfo *
368 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
370 spin_lock(&cifs_file->file_info_lock);
371 cifsFileInfo_get_locked(cifs_file);
372 spin_unlock(&cifs_file->file_info_lock);
377 * Release a reference on the file private data. This may involve closing
378 * the filehandle out on the server. Must be called without holding
379 * tcon->open_file_lock and cifs_file->file_info_lock.
381 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
383 struct inode *inode = d_inode(cifs_file->dentry);
384 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
385 struct TCP_Server_Info *server = tcon->ses->server;
386 struct cifsInodeInfo *cifsi = CIFS_I(inode);
387 struct super_block *sb = inode->i_sb;
388 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
389 struct cifsLockInfo *li, *tmp;
391 struct cifs_pending_open open;
392 bool oplock_break_cancelled;
394 spin_lock(&tcon->open_file_lock);
396 spin_lock(&cifs_file->file_info_lock);
397 if (--cifs_file->count > 0) {
398 spin_unlock(&cifs_file->file_info_lock);
399 spin_unlock(&tcon->open_file_lock);
402 spin_unlock(&cifs_file->file_info_lock);
404 if (server->ops->get_lease_key)
405 server->ops->get_lease_key(inode, &fid);
407 /* store open in pending opens to make sure we don't miss lease break */
408 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
410 /* remove it from the lists */
411 list_del(&cifs_file->flist);
412 list_del(&cifs_file->tlist);
414 if (list_empty(&cifsi->openFileList)) {
415 cifs_dbg(FYI, "closing last open instance for inode %p\n",
416 d_inode(cifs_file->dentry));
418 * In strict cache mode we need invalidate mapping on the last
419 * close because it may cause a error when we open this file
420 * again and get at least level II oplock.
422 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
423 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
424 cifs_set_oplock_level(cifsi, 0);
427 spin_unlock(&tcon->open_file_lock);
429 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
431 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
432 struct TCP_Server_Info *server = tcon->ses->server;
436 if (server->ops->close)
437 server->ops->close(xid, tcon, &cifs_file->fid);
441 if (oplock_break_cancelled)
442 cifs_done_oplock_break(cifsi);
444 cifs_del_pending_open(&open);
447 * Delete any outstanding lock records. We'll lose them when the file
450 cifs_down_write(&cifsi->lock_sem);
451 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
452 list_del(&li->llist);
453 cifs_del_lock_waiters(li);
456 list_del(&cifs_file->llist->llist);
457 kfree(cifs_file->llist);
458 up_write(&cifsi->lock_sem);
460 cifs_put_tlink(cifs_file->tlink);
461 dput(cifs_file->dentry);
462 cifs_sb_deactive(sb);
466 int cifs_open(struct inode *inode, struct file *file)
472 struct cifs_sb_info *cifs_sb;
473 struct TCP_Server_Info *server;
474 struct cifs_tcon *tcon;
475 struct tcon_link *tlink;
476 struct cifsFileInfo *cfile = NULL;
477 char *full_path = NULL;
478 bool posix_open_ok = false;
480 struct cifs_pending_open open;
484 cifs_sb = CIFS_SB(inode->i_sb);
485 tlink = cifs_sb_tlink(cifs_sb);
488 return PTR_ERR(tlink);
490 tcon = tlink_tcon(tlink);
491 server = tcon->ses->server;
493 full_path = build_path_from_dentry(file->f_path.dentry);
494 if (full_path == NULL) {
499 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
500 inode, file->f_flags, full_path);
502 if (file->f_flags & O_DIRECT &&
503 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
504 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
505 file->f_op = &cifs_file_direct_nobrl_ops;
507 file->f_op = &cifs_file_direct_ops;
515 if (!tcon->broken_posix_open && tcon->unix_ext &&
516 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
517 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
518 /* can not refresh inode info since size could be stale */
519 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
520 cifs_sb->mnt_file_mode /* ignored */,
521 file->f_flags, &oplock, &fid.netfid, xid);
523 cifs_dbg(FYI, "posix open succeeded\n");
524 posix_open_ok = true;
525 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
526 if (tcon->ses->serverNOS)
527 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
528 tcon->ses->serverName,
529 tcon->ses->serverNOS);
530 tcon->broken_posix_open = true;
531 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
532 (rc != -EOPNOTSUPP)) /* path not found or net err */
535 * Else fallthrough to retry open the old way on network i/o
540 if (server->ops->get_lease_key)
541 server->ops->get_lease_key(inode, &fid);
543 cifs_add_pending_open(&fid, tlink, &open);
545 if (!posix_open_ok) {
546 if (server->ops->get_lease_key)
547 server->ops->get_lease_key(inode, &fid);
549 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
550 file->f_flags, &oplock, &fid, xid);
552 cifs_del_pending_open(&open);
557 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
559 if (server->ops->close)
560 server->ops->close(xid, tcon, &fid);
561 cifs_del_pending_open(&open);
566 cifs_fscache_set_inode_cookie(inode, file);
568 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
570 * Time to set mode which we can not set earlier due to
571 * problems creating new read-only files.
573 struct cifs_unix_set_info_args args = {
574 .mode = inode->i_mode,
575 .uid = INVALID_UID, /* no change */
576 .gid = INVALID_GID, /* no change */
577 .ctime = NO_CHANGE_64,
578 .atime = NO_CHANGE_64,
579 .mtime = NO_CHANGE_64,
582 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
589 cifs_put_tlink(tlink);
593 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
596 * Try to reacquire byte range locks that were released when session
597 * to server was lost.
600 cifs_relock_file(struct cifsFileInfo *cfile)
602 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
603 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
604 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
607 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
608 if (cinode->can_cache_brlcks) {
609 /* can cache locks - no need to relock */
610 up_read(&cinode->lock_sem);
614 if (cap_unix(tcon->ses) &&
615 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
616 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
617 rc = cifs_push_posix_locks(cfile);
619 rc = tcon->ses->server->ops->push_mand_locks(cfile);
621 up_read(&cinode->lock_sem);
626 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
631 struct cifs_sb_info *cifs_sb;
632 struct cifs_tcon *tcon;
633 struct TCP_Server_Info *server;
634 struct cifsInodeInfo *cinode;
636 char *full_path = NULL;
638 int disposition = FILE_OPEN;
639 int create_options = CREATE_NOT_DIR;
640 struct cifs_open_parms oparms;
643 mutex_lock(&cfile->fh_mutex);
644 if (!cfile->invalidHandle) {
645 mutex_unlock(&cfile->fh_mutex);
651 inode = d_inode(cfile->dentry);
652 cifs_sb = CIFS_SB(inode->i_sb);
653 tcon = tlink_tcon(cfile->tlink);
654 server = tcon->ses->server;
657 * Can not grab rename sem here because various ops, including those
658 * that already have the rename sem can end up causing writepage to get
659 * called and if the server was down that means we end up here, and we
660 * can never tell if the caller already has the rename_sem.
662 full_path = build_path_from_dentry(cfile->dentry);
663 if (full_path == NULL) {
665 mutex_unlock(&cfile->fh_mutex);
670 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
671 inode, cfile->f_flags, full_path);
673 if (tcon->ses->server->oplocks)
678 if (tcon->unix_ext && cap_unix(tcon->ses) &&
679 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
680 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
682 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
683 * original open. Must mask them off for a reopen.
685 unsigned int oflags = cfile->f_flags &
686 ~(O_CREAT | O_EXCL | O_TRUNC);
688 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
689 cifs_sb->mnt_file_mode /* ignored */,
690 oflags, &oplock, &cfile->fid.netfid, xid);
692 cifs_dbg(FYI, "posix reopen succeeded\n");
693 oparms.reconnect = true;
697 * fallthrough to retry open the old way on errors, especially
698 * in the reconnect path it is important to retry hard
702 desired_access = cifs_convert_flags(cfile->f_flags);
704 if (backup_cred(cifs_sb))
705 create_options |= CREATE_OPEN_BACKUP_INTENT;
707 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
708 if (cfile->f_flags & O_SYNC)
709 create_options |= CREATE_WRITE_THROUGH;
711 if (cfile->f_flags & O_DIRECT)
712 create_options |= CREATE_NO_BUFFER;
714 if (server->ops->get_lease_key)
715 server->ops->get_lease_key(inode, &cfile->fid);
718 oparms.cifs_sb = cifs_sb;
719 oparms.desired_access = desired_access;
720 oparms.create_options = create_options;
721 oparms.disposition = disposition;
722 oparms.path = full_path;
723 oparms.fid = &cfile->fid;
724 oparms.reconnect = true;
727 * Can not refresh inode by passing in file_info buf to be returned by
728 * ops->open and then calling get_inode_info with returned buf since
729 * file might have write behind data that needs to be flushed and server
730 * version of file size can be stale. If we knew for sure that inode was
731 * not dirty locally we could do this.
733 rc = server->ops->open(xid, &oparms, &oplock, NULL);
734 if (rc == -ENOENT && oparms.reconnect == false) {
735 /* durable handle timeout is expired - open the file again */
736 rc = server->ops->open(xid, &oparms, &oplock, NULL);
737 /* indicate that we need to relock the file */
738 oparms.reconnect = true;
742 mutex_unlock(&cfile->fh_mutex);
743 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
744 cifs_dbg(FYI, "oplock: %d\n", oplock);
745 goto reopen_error_exit;
749 cfile->invalidHandle = false;
750 mutex_unlock(&cfile->fh_mutex);
751 cinode = CIFS_I(inode);
754 rc = filemap_write_and_wait(inode->i_mapping);
755 mapping_set_error(inode->i_mapping, rc);
758 rc = cifs_get_inode_info_unix(&inode, full_path,
761 rc = cifs_get_inode_info(&inode, full_path, NULL,
762 inode->i_sb, xid, NULL);
765 * Else we are writing out data to server already and could deadlock if
766 * we tried to flush data, and since we do not know if we have data that
767 * would invalidate the current end of file on the server we can not go
768 * to the server to get the new inode info.
771 server->ops->set_fid(cfile, &cfile->fid, oplock);
772 if (oparms.reconnect)
773 cifs_relock_file(cfile);
781 int cifs_close(struct inode *inode, struct file *file)
783 if (file->private_data != NULL) {
784 cifsFileInfo_put(file->private_data);
785 file->private_data = NULL;
788 /* return code from the ->release op is always ignored */
792 int cifs_closedir(struct inode *inode, struct file *file)
796 struct cifsFileInfo *cfile = file->private_data;
797 struct cifs_tcon *tcon;
798 struct TCP_Server_Info *server;
801 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
807 tcon = tlink_tcon(cfile->tlink);
808 server = tcon->ses->server;
810 cifs_dbg(FYI, "Freeing private data in close dir\n");
811 spin_lock(&cfile->file_info_lock);
812 if (server->ops->dir_needs_close(cfile)) {
813 cfile->invalidHandle = true;
814 spin_unlock(&cfile->file_info_lock);
815 if (server->ops->close_dir)
816 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
819 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
820 /* not much we can do if it fails anyway, ignore rc */
823 spin_unlock(&cfile->file_info_lock);
825 buf = cfile->srch_inf.ntwrk_buf_start;
827 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
828 cfile->srch_inf.ntwrk_buf_start = NULL;
829 if (cfile->srch_inf.smallBuf)
830 cifs_small_buf_release(buf);
832 cifs_buf_release(buf);
835 cifs_put_tlink(cfile->tlink);
836 kfree(file->private_data);
837 file->private_data = NULL;
838 /* BB can we lock the filestruct while this is going on? */
843 static struct cifsLockInfo *
844 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
846 struct cifsLockInfo *lock =
847 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
850 lock->offset = offset;
851 lock->length = length;
853 lock->pid = current->tgid;
854 INIT_LIST_HEAD(&lock->blist);
855 init_waitqueue_head(&lock->block_q);
860 cifs_del_lock_waiters(struct cifsLockInfo *lock)
862 struct cifsLockInfo *li, *tmp;
863 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
864 list_del_init(&li->blist);
865 wake_up(&li->block_q);
869 #define CIFS_LOCK_OP 0
870 #define CIFS_READ_OP 1
871 #define CIFS_WRITE_OP 2
873 /* @rw_check : 0 - no op, 1 - read, 2 - write */
875 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
876 __u64 length, __u8 type, struct cifsFileInfo *cfile,
877 struct cifsLockInfo **conf_lock, int rw_check)
879 struct cifsLockInfo *li;
880 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
881 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
883 list_for_each_entry(li, &fdlocks->locks, llist) {
884 if (offset + length <= li->offset ||
885 offset >= li->offset + li->length)
887 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
888 server->ops->compare_fids(cfile, cur_cfile)) {
889 /* shared lock prevents write op through the same fid */
890 if (!(li->type & server->vals->shared_lock_type) ||
891 rw_check != CIFS_WRITE_OP)
894 if ((type & server->vals->shared_lock_type) &&
895 ((server->ops->compare_fids(cfile, cur_cfile) &&
896 current->tgid == li->pid) || type == li->type))
906 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
907 __u8 type, struct cifsLockInfo **conf_lock,
911 struct cifs_fid_locks *cur;
912 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
914 list_for_each_entry(cur, &cinode->llist, llist) {
915 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
916 cfile, conf_lock, rw_check);
925 * Check if there is another lock that prevents us to set the lock (mandatory
926 * style). If such a lock exists, update the flock structure with its
927 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
928 * or leave it the same if we can't. Returns 0 if we don't need to request to
929 * the server or 1 otherwise.
932 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
933 __u8 type, struct file_lock *flock)
936 struct cifsLockInfo *conf_lock;
937 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
938 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
941 down_read(&cinode->lock_sem);
943 exist = cifs_find_lock_conflict(cfile, offset, length, type,
944 &conf_lock, CIFS_LOCK_OP);
946 flock->fl_start = conf_lock->offset;
947 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
948 flock->fl_pid = conf_lock->pid;
949 if (conf_lock->type & server->vals->shared_lock_type)
950 flock->fl_type = F_RDLCK;
952 flock->fl_type = F_WRLCK;
953 } else if (!cinode->can_cache_brlcks)
956 flock->fl_type = F_UNLCK;
958 up_read(&cinode->lock_sem);
963 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
965 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
966 cifs_down_write(&cinode->lock_sem);
967 list_add_tail(&lock->llist, &cfile->llist->locks);
968 up_write(&cinode->lock_sem);
972 * Set the byte-range lock (mandatory style). Returns:
973 * 1) 0, if we set the lock and don't need to request to the server;
974 * 2) 1, if no locks prevent us but we need to request to the server;
975 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
978 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
981 struct cifsLockInfo *conf_lock;
982 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
988 cifs_down_write(&cinode->lock_sem);
990 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
991 lock->type, &conf_lock, CIFS_LOCK_OP);
992 if (!exist && cinode->can_cache_brlcks) {
993 list_add_tail(&lock->llist, &cfile->llist->locks);
994 up_write(&cinode->lock_sem);
1003 list_add_tail(&lock->blist, &conf_lock->blist);
1004 up_write(&cinode->lock_sem);
1005 rc = wait_event_interruptible(lock->block_q,
1006 (lock->blist.prev == &lock->blist) &&
1007 (lock->blist.next == &lock->blist));
1010 cifs_down_write(&cinode->lock_sem);
1011 list_del_init(&lock->blist);
1014 up_write(&cinode->lock_sem);
1019 * Check if there is another lock that prevents us to set the lock (posix
1020 * style). If such a lock exists, update the flock structure with its
1021 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1022 * or leave it the same if we can't. Returns 0 if we don't need to request to
1023 * the server or 1 otherwise.
1026 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1029 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1030 unsigned char saved_type = flock->fl_type;
1032 if ((flock->fl_flags & FL_POSIX) == 0)
1035 down_read(&cinode->lock_sem);
1036 posix_test_lock(file, flock);
1038 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1039 flock->fl_type = saved_type;
1043 up_read(&cinode->lock_sem);
1048 * Set the byte-range lock (posix style). Returns:
1049 * 1) 0, if we set the lock and don't need to request to the server;
1050 * 2) 1, if we need to request to the server;
1051 * 3) <0, if the error occurs while setting the lock.
1054 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1056 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1059 if ((flock->fl_flags & FL_POSIX) == 0)
1063 cifs_down_write(&cinode->lock_sem);
1064 if (!cinode->can_cache_brlcks) {
1065 up_write(&cinode->lock_sem);
1069 rc = posix_lock_file(file, flock, NULL);
1070 up_write(&cinode->lock_sem);
1071 if (rc == FILE_LOCK_DEFERRED) {
1072 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1075 posix_unblock_lock(flock);
1081 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1084 int rc = 0, stored_rc;
1085 struct cifsLockInfo *li, *tmp;
1086 struct cifs_tcon *tcon;
1087 unsigned int num, max_num, max_buf;
1088 LOCKING_ANDX_RANGE *buf, *cur;
1089 int types[] = {LOCKING_ANDX_LARGE_FILES,
1090 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1094 tcon = tlink_tcon(cfile->tlink);
1097 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1098 * and check it before using.
1100 max_buf = tcon->ses->server->maxBuf;
1101 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1106 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1108 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1110 max_num = (max_buf - sizeof(struct smb_hdr)) /
1111 sizeof(LOCKING_ANDX_RANGE);
1112 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1118 for (i = 0; i < 2; i++) {
1121 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1122 if (li->type != types[i])
1124 cur->Pid = cpu_to_le16(li->pid);
1125 cur->LengthLow = cpu_to_le32((u32)li->length);
1126 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1127 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1128 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1129 if (++num == max_num) {
1130 stored_rc = cifs_lockv(xid, tcon,
1132 (__u8)li->type, 0, num,
1143 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1144 (__u8)types[i], 0, num, buf);
1155 struct lock_to_push {
1156 struct list_head llist;
1165 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1167 struct inode *inode = d_inode(cfile->dentry);
1168 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1169 struct file_lock *flock;
1170 struct file_lock_context *flctx = inode->i_flctx;
1171 unsigned int count = 0, i;
1172 int rc = 0, xid, type;
1173 struct list_head locks_to_send, *el;
1174 struct lock_to_push *lck, *tmp;
1182 spin_lock(&flctx->flc_lock);
1183 list_for_each(el, &flctx->flc_posix) {
1186 spin_unlock(&flctx->flc_lock);
1188 INIT_LIST_HEAD(&locks_to_send);
1191 * Allocating count locks is enough because no FL_POSIX locks can be
1192 * added to the list while we are holding cinode->lock_sem that
1193 * protects locking operations of this inode.
1195 for (i = 0; i < count; i++) {
1196 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1201 list_add_tail(&lck->llist, &locks_to_send);
1204 el = locks_to_send.next;
1205 spin_lock(&flctx->flc_lock);
1206 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1207 if (el == &locks_to_send) {
1209 * The list ended. We don't have enough allocated
1210 * structures - something is really wrong.
1212 cifs_dbg(VFS, "Can't push all brlocks!\n");
1215 length = 1 + flock->fl_end - flock->fl_start;
1216 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1220 lck = list_entry(el, struct lock_to_push, llist);
1221 lck->pid = flock->fl_pid;
1222 lck->netfid = cfile->fid.netfid;
1223 lck->length = length;
1225 lck->offset = flock->fl_start;
1227 spin_unlock(&flctx->flc_lock);
1229 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1232 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1233 lck->offset, lck->length, NULL,
1237 list_del(&lck->llist);
1245 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1246 list_del(&lck->llist);
1253 cifs_push_locks(struct cifsFileInfo *cfile)
1255 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1256 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1257 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1260 /* we are going to update can_cache_brlcks here - need a write access */
1261 cifs_down_write(&cinode->lock_sem);
1262 if (!cinode->can_cache_brlcks) {
1263 up_write(&cinode->lock_sem);
1267 if (cap_unix(tcon->ses) &&
1268 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1269 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1270 rc = cifs_push_posix_locks(cfile);
1272 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1274 cinode->can_cache_brlcks = false;
1275 up_write(&cinode->lock_sem);
1280 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1281 bool *wait_flag, struct TCP_Server_Info *server)
1283 if (flock->fl_flags & FL_POSIX)
1284 cifs_dbg(FYI, "Posix\n");
1285 if (flock->fl_flags & FL_FLOCK)
1286 cifs_dbg(FYI, "Flock\n");
1287 if (flock->fl_flags & FL_SLEEP) {
1288 cifs_dbg(FYI, "Blocking lock\n");
1291 if (flock->fl_flags & FL_ACCESS)
1292 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1293 if (flock->fl_flags & FL_LEASE)
1294 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1295 if (flock->fl_flags &
1296 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1297 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1298 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1300 *type = server->vals->large_lock_type;
1301 if (flock->fl_type == F_WRLCK) {
1302 cifs_dbg(FYI, "F_WRLCK\n");
1303 *type |= server->vals->exclusive_lock_type;
1305 } else if (flock->fl_type == F_UNLCK) {
1306 cifs_dbg(FYI, "F_UNLCK\n");
1307 *type |= server->vals->unlock_lock_type;
1309 /* Check if unlock includes more than one lock range */
1310 } else if (flock->fl_type == F_RDLCK) {
1311 cifs_dbg(FYI, "F_RDLCK\n");
1312 *type |= server->vals->shared_lock_type;
1314 } else if (flock->fl_type == F_EXLCK) {
1315 cifs_dbg(FYI, "F_EXLCK\n");
1316 *type |= server->vals->exclusive_lock_type;
1318 } else if (flock->fl_type == F_SHLCK) {
1319 cifs_dbg(FYI, "F_SHLCK\n");
1320 *type |= server->vals->shared_lock_type;
1323 cifs_dbg(FYI, "Unknown type of lock\n");
1327 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1328 bool wait_flag, bool posix_lck, unsigned int xid)
1331 __u64 length = 1 + flock->fl_end - flock->fl_start;
1332 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1333 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1334 struct TCP_Server_Info *server = tcon->ses->server;
1335 __u16 netfid = cfile->fid.netfid;
1338 int posix_lock_type;
1340 rc = cifs_posix_lock_test(file, flock);
1344 if (type & server->vals->shared_lock_type)
1345 posix_lock_type = CIFS_RDLCK;
1347 posix_lock_type = CIFS_WRLCK;
1348 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1349 flock->fl_start, length, flock,
1350 posix_lock_type, wait_flag);
1354 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1358 /* BB we could chain these into one lock request BB */
1359 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1362 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1364 flock->fl_type = F_UNLCK;
1366 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1371 if (type & server->vals->shared_lock_type) {
1372 flock->fl_type = F_WRLCK;
1376 type &= ~server->vals->exclusive_lock_type;
1378 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1379 type | server->vals->shared_lock_type,
1382 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1383 type | server->vals->shared_lock_type, 0, 1, false);
1384 flock->fl_type = F_RDLCK;
1386 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1389 flock->fl_type = F_WRLCK;
1395 cifs_move_llist(struct list_head *source, struct list_head *dest)
1397 struct list_head *li, *tmp;
1398 list_for_each_safe(li, tmp, source)
1399 list_move(li, dest);
1403 cifs_free_llist(struct list_head *llist)
1405 struct cifsLockInfo *li, *tmp;
1406 list_for_each_entry_safe(li, tmp, llist, llist) {
1407 cifs_del_lock_waiters(li);
1408 list_del(&li->llist);
1414 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1417 int rc = 0, stored_rc;
1418 int types[] = {LOCKING_ANDX_LARGE_FILES,
1419 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1421 unsigned int max_num, num, max_buf;
1422 LOCKING_ANDX_RANGE *buf, *cur;
1423 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1424 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1425 struct cifsLockInfo *li, *tmp;
1426 __u64 length = 1 + flock->fl_end - flock->fl_start;
1427 struct list_head tmp_llist;
1429 INIT_LIST_HEAD(&tmp_llist);
1432 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1433 * and check it before using.
1435 max_buf = tcon->ses->server->maxBuf;
1436 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1439 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1441 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1443 max_num = (max_buf - sizeof(struct smb_hdr)) /
1444 sizeof(LOCKING_ANDX_RANGE);
1445 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1449 cifs_down_write(&cinode->lock_sem);
1450 for (i = 0; i < 2; i++) {
1453 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1454 if (flock->fl_start > li->offset ||
1455 (flock->fl_start + length) <
1456 (li->offset + li->length))
1458 if (current->tgid != li->pid)
1460 if (types[i] != li->type)
1462 if (cinode->can_cache_brlcks) {
1464 * We can cache brlock requests - simply remove
1465 * a lock from the file's list.
1467 list_del(&li->llist);
1468 cifs_del_lock_waiters(li);
1472 cur->Pid = cpu_to_le16(li->pid);
1473 cur->LengthLow = cpu_to_le32((u32)li->length);
1474 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1475 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1476 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1478 * We need to save a lock here to let us add it again to
1479 * the file's list if the unlock range request fails on
1482 list_move(&li->llist, &tmp_llist);
1483 if (++num == max_num) {
1484 stored_rc = cifs_lockv(xid, tcon,
1486 li->type, num, 0, buf);
1489 * We failed on the unlock range
1490 * request - add all locks from the tmp
1491 * list to the head of the file's list.
1493 cifs_move_llist(&tmp_llist,
1494 &cfile->llist->locks);
1498 * The unlock range request succeed -
1499 * free the tmp list.
1501 cifs_free_llist(&tmp_llist);
1508 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1509 types[i], num, 0, buf);
1511 cifs_move_llist(&tmp_llist,
1512 &cfile->llist->locks);
1515 cifs_free_llist(&tmp_llist);
1519 up_write(&cinode->lock_sem);
1525 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1526 bool wait_flag, bool posix_lck, int lock, int unlock,
1530 __u64 length = 1 + flock->fl_end - flock->fl_start;
1531 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1532 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1533 struct TCP_Server_Info *server = tcon->ses->server;
1534 struct inode *inode = d_inode(cfile->dentry);
1537 int posix_lock_type;
1539 rc = cifs_posix_lock_set(file, flock);
1543 if (type & server->vals->shared_lock_type)
1544 posix_lock_type = CIFS_RDLCK;
1546 posix_lock_type = CIFS_WRLCK;
1549 posix_lock_type = CIFS_UNLCK;
1551 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1552 current->tgid, flock->fl_start, length,
1553 NULL, posix_lock_type, wait_flag);
1558 struct cifsLockInfo *lock;
1560 lock = cifs_lock_init(flock->fl_start, length, type);
1564 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1573 * Windows 7 server can delay breaking lease from read to None
1574 * if we set a byte-range lock on a file - break it explicitly
1575 * before sending the lock to the server to be sure the next
1576 * read won't conflict with non-overlapted locks due to
1579 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1580 CIFS_CACHE_READ(CIFS_I(inode))) {
1581 cifs_zap_mapping(inode);
1582 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1584 CIFS_I(inode)->oplock = 0;
1587 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1588 type, 1, 0, wait_flag);
1594 cifs_lock_add(cfile, lock);
1596 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1599 if (flock->fl_flags & FL_POSIX) {
1601 * If this is a request to remove all locks because we
1602 * are closing the file, it doesn't matter if the
1603 * unlocking failed as both cifs.ko and the SMB server
1604 * remove the lock on file close
1607 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1608 if (!(flock->fl_flags & FL_CLOSE))
1611 rc = locks_lock_file_wait(file, flock);
1616 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1619 int lock = 0, unlock = 0;
1620 bool wait_flag = false;
1621 bool posix_lck = false;
1622 struct cifs_sb_info *cifs_sb;
1623 struct cifs_tcon *tcon;
1624 struct cifsInodeInfo *cinode;
1625 struct cifsFileInfo *cfile;
1632 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1633 cmd, flock->fl_flags, flock->fl_type,
1634 flock->fl_start, flock->fl_end);
1636 cfile = (struct cifsFileInfo *)file->private_data;
1637 tcon = tlink_tcon(cfile->tlink);
1639 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1642 cifs_sb = CIFS_FILE_SB(file);
1643 netfid = cfile->fid.netfid;
1644 cinode = CIFS_I(file_inode(file));
1646 if (cap_unix(tcon->ses) &&
1647 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1648 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1651 * BB add code here to normalize offset and length to account for
1652 * negative length which we can not accept over the wire.
1654 if (IS_GETLK(cmd)) {
1655 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1660 if (!lock && !unlock) {
1662 * if no lock or unlock then nothing to do since we do not
1669 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1676 * update the file size (if needed) after a write. Should be called with
1677 * the inode->i_lock held
1680 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1681 unsigned int bytes_written)
1683 loff_t end_of_write = offset + bytes_written;
1685 if (end_of_write > cifsi->server_eof)
1686 cifsi->server_eof = end_of_write;
1690 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1691 size_t write_size, loff_t *offset)
1694 unsigned int bytes_written = 0;
1695 unsigned int total_written;
1696 struct cifs_sb_info *cifs_sb;
1697 struct cifs_tcon *tcon;
1698 struct TCP_Server_Info *server;
1700 struct dentry *dentry = open_file->dentry;
1701 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1702 struct cifs_io_parms io_parms;
1704 cifs_sb = CIFS_SB(dentry->d_sb);
1706 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1707 write_size, *offset, dentry);
1709 tcon = tlink_tcon(open_file->tlink);
1710 server = tcon->ses->server;
1712 if (!server->ops->sync_write)
1717 for (total_written = 0; write_size > total_written;
1718 total_written += bytes_written) {
1720 while (rc == -EAGAIN) {
1724 if (open_file->invalidHandle) {
1725 /* we could deadlock if we called
1726 filemap_fdatawait from here so tell
1727 reopen_file not to flush data to
1729 rc = cifs_reopen_file(open_file, false);
1734 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1735 (unsigned int)write_size - total_written);
1736 /* iov[0] is reserved for smb header */
1737 iov[1].iov_base = (char *)write_data + total_written;
1738 iov[1].iov_len = len;
1740 io_parms.tcon = tcon;
1741 io_parms.offset = *offset;
1742 io_parms.length = len;
1743 rc = server->ops->sync_write(xid, &open_file->fid,
1744 &io_parms, &bytes_written, iov, 1);
1746 if (rc || (bytes_written == 0)) {
1754 spin_lock(&d_inode(dentry)->i_lock);
1755 cifs_update_eof(cifsi, *offset, bytes_written);
1756 spin_unlock(&d_inode(dentry)->i_lock);
1757 *offset += bytes_written;
1761 cifs_stats_bytes_written(tcon, total_written);
1763 if (total_written > 0) {
1764 spin_lock(&d_inode(dentry)->i_lock);
1765 if (*offset > d_inode(dentry)->i_size)
1766 i_size_write(d_inode(dentry), *offset);
1767 spin_unlock(&d_inode(dentry)->i_lock);
1769 mark_inode_dirty_sync(d_inode(dentry));
1771 return total_written;
1774 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1777 struct cifsFileInfo *open_file = NULL;
1778 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1779 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1781 /* only filter by fsuid on multiuser mounts */
1782 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1785 spin_lock(&tcon->open_file_lock);
1786 /* we could simply get the first_list_entry since write-only entries
1787 are always at the end of the list but since the first entry might
1788 have a close pending, we go through the whole list */
1789 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1790 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1792 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1793 if (!open_file->invalidHandle) {
1794 /* found a good file */
1795 /* lock it so it will not be closed on us */
1796 cifsFileInfo_get(open_file);
1797 spin_unlock(&tcon->open_file_lock);
1799 } /* else might as well continue, and look for
1800 another, or simply have the caller reopen it
1801 again rather than trying to fix this handle */
1802 } else /* write only file */
1803 break; /* write only files are last so must be done */
1805 spin_unlock(&tcon->open_file_lock);
1809 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1812 struct cifsFileInfo *open_file, *inv_file = NULL;
1813 struct cifs_sb_info *cifs_sb;
1814 struct cifs_tcon *tcon;
1815 bool any_available = false;
1817 unsigned int refind = 0;
1819 /* Having a null inode here (because mapping->host was set to zero by
1820 the VFS or MM) should not happen but we had reports of on oops (due to
1821 it being zero) during stress testcases so we need to check for it */
1823 if (cifs_inode == NULL) {
1824 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1829 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1830 tcon = cifs_sb_master_tcon(cifs_sb);
1832 /* only filter by fsuid on multiuser mounts */
1833 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1836 spin_lock(&tcon->open_file_lock);
1838 if (refind > MAX_REOPEN_ATT) {
1839 spin_unlock(&tcon->open_file_lock);
1842 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1843 if (!any_available && open_file->pid != current->tgid)
1845 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1847 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1848 if (!open_file->invalidHandle) {
1849 /* found a good writable file */
1850 cifsFileInfo_get(open_file);
1851 spin_unlock(&tcon->open_file_lock);
1855 inv_file = open_file;
1859 /* couldn't find useable FH with same pid, try any available */
1860 if (!any_available) {
1861 any_available = true;
1862 goto refind_writable;
1866 any_available = false;
1867 cifsFileInfo_get(inv_file);
1870 spin_unlock(&tcon->open_file_lock);
1873 rc = cifs_reopen_file(inv_file, false);
1877 spin_lock(&tcon->open_file_lock);
1878 list_move_tail(&inv_file->flist,
1879 &cifs_inode->openFileList);
1880 spin_unlock(&tcon->open_file_lock);
1881 cifsFileInfo_put(inv_file);
1884 spin_lock(&tcon->open_file_lock);
1885 goto refind_writable;
1892 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1894 struct address_space *mapping = page->mapping;
1895 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1898 int bytes_written = 0;
1899 struct inode *inode;
1900 struct cifsFileInfo *open_file;
1902 if (!mapping || !mapping->host)
1905 inode = page->mapping->host;
1907 offset += (loff_t)from;
1908 write_data = kmap(page);
1911 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1916 /* racing with truncate? */
1917 if (offset > mapping->host->i_size) {
1919 return 0; /* don't care */
1922 /* check to make sure that we are not extending the file */
1923 if (mapping->host->i_size - offset < (loff_t)to)
1924 to = (unsigned)(mapping->host->i_size - offset);
1926 open_file = find_writable_file(CIFS_I(mapping->host), false);
1928 bytes_written = cifs_write(open_file, open_file->pid,
1929 write_data, to - from, &offset);
1930 cifsFileInfo_put(open_file);
1931 /* Does mm or vfs already set times? */
1932 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1933 if ((bytes_written > 0) && (offset))
1935 else if (bytes_written < 0)
1938 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1946 static struct cifs_writedata *
1947 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1948 pgoff_t end, pgoff_t *index,
1949 unsigned int *found_pages)
1951 unsigned int nr_pages;
1952 struct page **pages;
1953 struct cifs_writedata *wdata;
1955 wdata = cifs_writedata_alloc((unsigned int)tofind,
1956 cifs_writev_complete);
1961 * find_get_pages_tag seems to return a max of 256 on each
1962 * iteration, so we must call it several times in order to
1963 * fill the array or the wsize is effectively limited to
1964 * 256 * PAGE_CACHE_SIZE.
1967 pages = wdata->pages;
1969 nr_pages = find_get_pages_tag(mapping, index,
1970 PAGECACHE_TAG_DIRTY, tofind,
1972 *found_pages += nr_pages;
1975 } while (nr_pages && tofind && *index <= end);
1981 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1982 struct address_space *mapping,
1983 struct writeback_control *wbc,
1984 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1986 unsigned int nr_pages = 0, i;
1989 for (i = 0; i < found_pages; i++) {
1990 page = wdata->pages[i];
1992 * At this point we hold neither mapping->tree_lock nor
1993 * lock on the page itself: the page may be truncated or
1994 * invalidated (changing page->mapping to NULL), or even
1995 * swizzled back from swapper_space to tmpfs file
2001 else if (!trylock_page(page))
2004 if (unlikely(page->mapping != mapping)) {
2009 if (!wbc->range_cyclic && page->index > end) {
2015 if (*next && (page->index != *next)) {
2016 /* Not next consecutive page */
2021 if (wbc->sync_mode != WB_SYNC_NONE)
2022 wait_on_page_writeback(page);
2024 if (PageWriteback(page) ||
2025 !clear_page_dirty_for_io(page)) {
2031 * This actually clears the dirty bit in the radix tree.
2032 * See cifs_writepage() for more commentary.
2034 set_page_writeback(page);
2035 if (page_offset(page) >= i_size_read(mapping->host)) {
2038 end_page_writeback(page);
2042 wdata->pages[i] = page;
2043 *next = page->index + 1;
2047 /* reset index to refind any pages skipped */
2049 *index = wdata->pages[0]->index + 1;
2051 /* put any pages we aren't going to use */
2052 for (i = nr_pages; i < found_pages; i++) {
2053 page_cache_release(wdata->pages[i]);
2054 wdata->pages[i] = NULL;
2061 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2062 struct address_space *mapping, struct writeback_control *wbc)
2065 struct TCP_Server_Info *server;
2068 wdata->sync_mode = wbc->sync_mode;
2069 wdata->nr_pages = nr_pages;
2070 wdata->offset = page_offset(wdata->pages[0]);
2071 wdata->pagesz = PAGE_CACHE_SIZE;
2072 wdata->tailsz = min(i_size_read(mapping->host) -
2073 page_offset(wdata->pages[nr_pages - 1]),
2074 (loff_t)PAGE_CACHE_SIZE);
2075 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2077 if (wdata->cfile != NULL)
2078 cifsFileInfo_put(wdata->cfile);
2079 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2080 if (!wdata->cfile) {
2081 cifs_dbg(VFS, "No writable handles for inode\n");
2084 wdata->pid = wdata->cfile->pid;
2085 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2086 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2089 for (i = 0; i < nr_pages; ++i)
2090 unlock_page(wdata->pages[i]);
2095 static int cifs_writepages(struct address_space *mapping,
2096 struct writeback_control *wbc)
2098 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2099 struct TCP_Server_Info *server;
2100 bool done = false, scanned = false, range_whole = false;
2102 struct cifs_writedata *wdata;
2106 * If wsize is smaller than the page cache size, default to writing
2107 * one page at a time via cifs_writepage
2109 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
2110 return generic_writepages(mapping, wbc);
2112 if (wbc->range_cyclic) {
2113 index = mapping->writeback_index; /* Start from prev offset */
2116 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2117 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2118 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2122 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2124 while (!done && index <= end) {
2125 unsigned int i, nr_pages, found_pages, wsize, credits;
2126 pgoff_t next = 0, tofind, saved_index = index;
2128 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2133 tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
2135 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2139 add_credits_and_wake_if(server, credits, 0);
2143 if (found_pages == 0) {
2144 kref_put(&wdata->refcount, cifs_writedata_release);
2145 add_credits_and_wake_if(server, credits, 0);
2149 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2150 end, &index, &next, &done);
2152 /* nothing to write? */
2153 if (nr_pages == 0) {
2154 kref_put(&wdata->refcount, cifs_writedata_release);
2155 add_credits_and_wake_if(server, credits, 0);
2159 wdata->credits = credits;
2161 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2163 /* send failure -- clean up the mess */
2165 add_credits_and_wake_if(server, wdata->credits, 0);
2166 for (i = 0; i < nr_pages; ++i) {
2168 redirty_page_for_writepage(wbc,
2171 SetPageError(wdata->pages[i]);
2172 end_page_writeback(wdata->pages[i]);
2173 page_cache_release(wdata->pages[i]);
2176 mapping_set_error(mapping, rc);
2178 kref_put(&wdata->refcount, cifs_writedata_release);
2180 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2181 index = saved_index;
2185 wbc->nr_to_write -= nr_pages;
2186 if (wbc->nr_to_write <= 0)
2192 if (!scanned && !done) {
2194 * We hit the last page and there is more work to be done: wrap
2195 * back to the start of the file
2202 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2203 mapping->writeback_index = index;
2209 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2215 /* BB add check for wbc flags */
2216 page_cache_get(page);
2217 if (!PageUptodate(page))
2218 cifs_dbg(FYI, "ppw - page not up to date\n");
2221 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2223 * A writepage() implementation always needs to do either this,
2224 * or re-dirty the page with "redirty_page_for_writepage()" in
2225 * the case of a failure.
2227 * Just unlocking the page will cause the radix tree tag-bits
2228 * to fail to update with the state of the page correctly.
2230 set_page_writeback(page);
2232 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2233 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2235 else if (rc == -EAGAIN)
2236 redirty_page_for_writepage(wbc, page);
2240 SetPageUptodate(page);
2241 end_page_writeback(page);
2242 page_cache_release(page);
2247 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2249 int rc = cifs_writepage_locked(page, wbc);
2254 static int cifs_write_end(struct file *file, struct address_space *mapping,
2255 loff_t pos, unsigned len, unsigned copied,
2256 struct page *page, void *fsdata)
2259 struct inode *inode = mapping->host;
2260 struct cifsFileInfo *cfile = file->private_data;
2261 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2264 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2267 pid = current->tgid;
2269 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2272 if (PageChecked(page)) {
2274 SetPageUptodate(page);
2275 ClearPageChecked(page);
2276 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2277 SetPageUptodate(page);
2279 if (!PageUptodate(page)) {
2281 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2285 /* this is probably better than directly calling
2286 partialpage_write since in this function the file handle is
2287 known which we might as well leverage */
2288 /* BB check if anything else missing out of ppw
2289 such as updating last write time */
2290 page_data = kmap(page);
2291 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2292 /* if (rc < 0) should we set writebehind rc? */
2299 set_page_dirty(page);
2303 spin_lock(&inode->i_lock);
2304 if (pos > inode->i_size)
2305 i_size_write(inode, pos);
2306 spin_unlock(&inode->i_lock);
2310 page_cache_release(page);
2315 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2320 struct cifs_tcon *tcon;
2321 struct TCP_Server_Info *server;
2322 struct cifsFileInfo *smbfile = file->private_data;
2323 struct inode *inode = file_inode(file);
2324 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2326 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2329 mutex_lock(&inode->i_mutex);
2333 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2336 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2337 rc = cifs_zap_mapping(inode);
2339 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2340 rc = 0; /* don't care about it in fsync */
2344 tcon = tlink_tcon(smbfile->tlink);
2345 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2346 server = tcon->ses->server;
2347 if (server->ops->flush)
2348 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2354 mutex_unlock(&inode->i_mutex);
2358 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2362 struct cifs_tcon *tcon;
2363 struct TCP_Server_Info *server;
2364 struct cifsFileInfo *smbfile = file->private_data;
2365 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2366 struct inode *inode = file->f_mapping->host;
2368 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2371 mutex_lock(&inode->i_mutex);
2375 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2378 tcon = tlink_tcon(smbfile->tlink);
2379 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2380 server = tcon->ses->server;
2381 if (server->ops->flush)
2382 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2388 mutex_unlock(&inode->i_mutex);
2393 * As file closes, flush all cached write data for this inode checking
2394 * for write behind errors.
2396 int cifs_flush(struct file *file, fl_owner_t id)
2398 struct inode *inode = file_inode(file);
2401 if (file->f_mode & FMODE_WRITE)
2402 rc = filemap_write_and_wait(inode->i_mapping);
2404 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2410 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2415 for (i = 0; i < num_pages; i++) {
2416 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2419 * save number of pages we have already allocated and
2420 * return with ENOMEM error
2429 for (i = 0; i < num_pages; i++)
2436 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2441 clen = min_t(const size_t, len, wsize);
2442 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2451 cifs_uncached_writedata_release(struct kref *refcount)
2454 struct cifs_writedata *wdata = container_of(refcount,
2455 struct cifs_writedata, refcount);
2457 for (i = 0; i < wdata->nr_pages; i++)
2458 put_page(wdata->pages[i]);
2459 cifs_writedata_release(refcount);
2463 cifs_uncached_writev_complete(struct work_struct *work)
2465 struct cifs_writedata *wdata = container_of(work,
2466 struct cifs_writedata, work);
2467 struct inode *inode = d_inode(wdata->cfile->dentry);
2468 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2470 spin_lock(&inode->i_lock);
2471 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2472 if (cifsi->server_eof > inode->i_size)
2473 i_size_write(inode, cifsi->server_eof);
2474 spin_unlock(&inode->i_lock);
2476 complete(&wdata->done);
2478 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2482 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2483 size_t *len, unsigned long *num_pages)
2485 size_t save_len, copied, bytes, cur_len = *len;
2486 unsigned long i, nr_pages = *num_pages;
2489 for (i = 0; i < nr_pages; i++) {
2490 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2491 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2494 * If we didn't copy as much as we expected, then that
2495 * may mean we trod into an unmapped area. Stop copying
2496 * at that point. On the next pass through the big
2497 * loop, we'll likely end up getting a zero-length
2498 * write and bailing out of it.
2503 cur_len = save_len - cur_len;
2507 * If we have no data to send, then that probably means that
2508 * the copy above failed altogether. That's most likely because
2509 * the address in the iovec was bogus. Return -EFAULT and let
2510 * the caller free anything we allocated and bail out.
2516 * i + 1 now represents the number of pages we actually used in
2517 * the copy phase above.
2524 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2525 struct cifsFileInfo *open_file,
2526 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2530 unsigned long nr_pages, num_pages, i;
2531 struct cifs_writedata *wdata;
2532 struct iov_iter saved_from;
2533 loff_t saved_offset = offset;
2535 struct TCP_Server_Info *server;
2537 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2538 pid = open_file->pid;
2540 pid = current->tgid;
2542 server = tlink_tcon(open_file->tlink)->ses->server;
2543 memcpy(&saved_from, from, sizeof(struct iov_iter));
2546 unsigned int wsize, credits;
2548 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2553 nr_pages = get_numpages(wsize, len, &cur_len);
2554 wdata = cifs_writedata_alloc(nr_pages,
2555 cifs_uncached_writev_complete);
2558 add_credits_and_wake_if(server, credits, 0);
2562 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2565 add_credits_and_wake_if(server, credits, 0);
2569 num_pages = nr_pages;
2570 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2572 for (i = 0; i < nr_pages; i++)
2573 put_page(wdata->pages[i]);
2575 add_credits_and_wake_if(server, credits, 0);
2580 * Bring nr_pages down to the number of pages we actually used,
2581 * and free any pages that we didn't use.
2583 for ( ; nr_pages > num_pages; nr_pages--)
2584 put_page(wdata->pages[nr_pages - 1]);
2586 wdata->sync_mode = WB_SYNC_ALL;
2587 wdata->nr_pages = nr_pages;
2588 wdata->offset = (__u64)offset;
2589 wdata->cfile = cifsFileInfo_get(open_file);
2591 wdata->bytes = cur_len;
2592 wdata->pagesz = PAGE_SIZE;
2593 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2594 wdata->credits = credits;
2596 if (!wdata->cfile->invalidHandle ||
2597 !(rc = cifs_reopen_file(wdata->cfile, false)))
2598 rc = server->ops->async_writev(wdata,
2599 cifs_uncached_writedata_release);
2601 add_credits_and_wake_if(server, wdata->credits, 0);
2602 kref_put(&wdata->refcount,
2603 cifs_uncached_writedata_release);
2604 if (rc == -EAGAIN) {
2605 memcpy(from, &saved_from,
2606 sizeof(struct iov_iter));
2607 iov_iter_advance(from, offset - saved_offset);
2613 list_add_tail(&wdata->list, wdata_list);
2621 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2623 struct file *file = iocb->ki_filp;
2624 ssize_t total_written = 0;
2625 struct cifsFileInfo *open_file;
2626 struct cifs_tcon *tcon;
2627 struct cifs_sb_info *cifs_sb;
2628 struct cifs_writedata *wdata, *tmp;
2629 struct list_head wdata_list;
2630 struct iov_iter saved_from;
2634 * BB - optimize the way when signing is disabled. We can drop this
2635 * extra memory-to-memory copying and use iovec buffers for constructing
2639 rc = generic_write_checks(iocb, from);
2643 INIT_LIST_HEAD(&wdata_list);
2644 cifs_sb = CIFS_FILE_SB(file);
2645 open_file = file->private_data;
2646 tcon = tlink_tcon(open_file->tlink);
2648 if (!tcon->ses->server->ops->async_writev)
2651 memcpy(&saved_from, from, sizeof(struct iov_iter));
2653 rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2654 open_file, cifs_sb, &wdata_list);
2657 * If at least one write was successfully sent, then discard any rc
2658 * value from the later writes. If the other write succeeds, then
2659 * we'll end up returning whatever was written. If it fails, then
2660 * we'll get a new rc value from that.
2662 if (!list_empty(&wdata_list))
2666 * Wait for and collect replies for any successful sends in order of
2667 * increasing offset. Once an error is hit or we get a fatal signal
2668 * while waiting, then return without waiting for any more replies.
2671 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2673 /* FIXME: freezable too? */
2674 rc = wait_for_completion_killable(&wdata->done);
2677 else if (wdata->result)
2680 total_written += wdata->bytes;
2682 /* resend call if it's a retryable error */
2683 if (rc == -EAGAIN) {
2684 struct list_head tmp_list;
2685 struct iov_iter tmp_from;
2687 INIT_LIST_HEAD(&tmp_list);
2688 list_del_init(&wdata->list);
2690 memcpy(&tmp_from, &saved_from,
2691 sizeof(struct iov_iter));
2692 iov_iter_advance(&tmp_from,
2693 wdata->offset - iocb->ki_pos);
2695 rc = cifs_write_from_iter(wdata->offset,
2696 wdata->bytes, &tmp_from,
2697 open_file, cifs_sb, &tmp_list);
2699 list_splice(&tmp_list, &wdata_list);
2701 kref_put(&wdata->refcount,
2702 cifs_uncached_writedata_release);
2706 list_del_init(&wdata->list);
2707 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2710 if (unlikely(!total_written))
2713 iocb->ki_pos += total_written;
2714 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2715 cifs_stats_bytes_written(tcon, total_written);
2716 return total_written;
2720 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2722 struct file *file = iocb->ki_filp;
2723 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2724 struct inode *inode = file->f_mapping->host;
2725 struct cifsInodeInfo *cinode = CIFS_I(inode);
2726 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2730 * We need to hold the sem to be sure nobody modifies lock list
2731 * with a brlock that prevents writing.
2733 down_read(&cinode->lock_sem);
2734 mutex_lock(&inode->i_mutex);
2736 rc = generic_write_checks(iocb, from);
2740 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2741 server->vals->exclusive_lock_type, NULL,
2743 rc = __generic_file_write_iter(iocb, from);
2747 mutex_unlock(&inode->i_mutex);
2750 ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2754 up_read(&cinode->lock_sem);
2759 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2761 struct inode *inode = file_inode(iocb->ki_filp);
2762 struct cifsInodeInfo *cinode = CIFS_I(inode);
2763 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2764 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2765 iocb->ki_filp->private_data;
2766 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2769 written = cifs_get_writer(cinode);
2773 if (CIFS_CACHE_WRITE(cinode)) {
2774 if (cap_unix(tcon->ses) &&
2775 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2776 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2777 written = generic_file_write_iter(iocb, from);
2780 written = cifs_writev(iocb, from);
2784 * For non-oplocked files in strict cache mode we need to write the data
2785 * to the server exactly from the pos to pos+len-1 rather than flush all
2786 * affected pages because it may cause a error with mandatory locks on
2787 * these pages but not on the region from pos to ppos+len-1.
2789 written = cifs_user_writev(iocb, from);
2790 if (CIFS_CACHE_READ(cinode)) {
2792 * We have read level caching and we have just sent a write
2793 * request to the server thus making data in the cache stale.
2794 * Zap the cache and set oplock/lease level to NONE to avoid
2795 * reading stale data from the cache. All subsequent read
2796 * operations will read new data from the server.
2798 cifs_zap_mapping(inode);
2799 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
2804 cifs_put_writer(cinode);
2808 static struct cifs_readdata *
2809 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2811 struct cifs_readdata *rdata;
2813 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2815 if (rdata != NULL) {
2816 kref_init(&rdata->refcount);
2817 INIT_LIST_HEAD(&rdata->list);
2818 init_completion(&rdata->done);
2819 INIT_WORK(&rdata->work, complete);
2826 cifs_readdata_release(struct kref *refcount)
2828 struct cifs_readdata *rdata = container_of(refcount,
2829 struct cifs_readdata, refcount);
2832 cifsFileInfo_put(rdata->cfile);
2838 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2844 for (i = 0; i < nr_pages; i++) {
2845 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2850 rdata->pages[i] = page;
2854 unsigned int nr_page_failed = i;
2856 for (i = 0; i < nr_page_failed; i++) {
2857 put_page(rdata->pages[i]);
2858 rdata->pages[i] = NULL;
2865 cifs_uncached_readdata_release(struct kref *refcount)
2867 struct cifs_readdata *rdata = container_of(refcount,
2868 struct cifs_readdata, refcount);
2871 for (i = 0; i < rdata->nr_pages; i++) {
2872 put_page(rdata->pages[i]);
2873 rdata->pages[i] = NULL;
2875 cifs_readdata_release(refcount);
2879 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2880 * @rdata: the readdata response with list of pages holding data
2881 * @iter: destination for our data
2883 * This function copies data from a list of pages in a readdata response into
2884 * an array of iovecs. It will first calculate where the data should go
2885 * based on the info in the readdata and then copy the data into that spot.
2888 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2890 size_t remaining = rdata->got_bytes;
2893 for (i = 0; i < rdata->nr_pages; i++) {
2894 struct page *page = rdata->pages[i];
2895 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2896 size_t written = copy_page_to_iter(page, 0, copy, iter);
2897 remaining -= written;
2898 if (written < copy && iov_iter_count(iter) > 0)
2901 return remaining ? -EFAULT : 0;
2905 cifs_uncached_readv_complete(struct work_struct *work)
2907 struct cifs_readdata *rdata = container_of(work,
2908 struct cifs_readdata, work);
2910 complete(&rdata->done);
2911 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2915 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2916 struct cifs_readdata *rdata, unsigned int len)
2920 unsigned int nr_pages = rdata->nr_pages;
2923 rdata->got_bytes = 0;
2924 rdata->tailsz = PAGE_SIZE;
2925 for (i = 0; i < nr_pages; i++) {
2926 struct page *page = rdata->pages[i];
2928 if (len >= PAGE_SIZE) {
2929 /* enough data to fill the page */
2930 iov.iov_base = kmap(page);
2931 iov.iov_len = PAGE_SIZE;
2932 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2933 i, iov.iov_base, iov.iov_len);
2935 } else if (len > 0) {
2936 /* enough for partial page, fill and zero the rest */
2937 iov.iov_base = kmap(page);
2939 cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2940 i, iov.iov_base, iov.iov_len);
2941 memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2942 rdata->tailsz = len;
2945 /* no need to hold page hostage */
2946 rdata->pages[i] = NULL;
2952 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2957 rdata->got_bytes += result;
2960 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2961 rdata->got_bytes : result;
2965 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2966 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2968 struct cifs_readdata *rdata;
2969 unsigned int npages, rsize, credits;
2973 struct TCP_Server_Info *server;
2975 server = tlink_tcon(open_file->tlink)->ses->server;
2977 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2978 pid = open_file->pid;
2980 pid = current->tgid;
2983 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2988 cur_len = min_t(const size_t, len, rsize);
2989 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2991 /* allocate a readdata struct */
2992 rdata = cifs_readdata_alloc(npages,
2993 cifs_uncached_readv_complete);
2995 add_credits_and_wake_if(server, credits, 0);
3000 rc = cifs_read_allocate_pages(rdata, npages);
3004 rdata->cfile = cifsFileInfo_get(open_file);
3005 rdata->nr_pages = npages;
3006 rdata->offset = offset;
3007 rdata->bytes = cur_len;
3009 rdata->pagesz = PAGE_SIZE;
3010 rdata->read_into_pages = cifs_uncached_read_into_pages;
3011 rdata->credits = credits;
3013 if (!rdata->cfile->invalidHandle ||
3014 !(rc = cifs_reopen_file(rdata->cfile, true)))
3015 rc = server->ops->async_readv(rdata);
3018 add_credits_and_wake_if(server, rdata->credits, 0);
3019 kref_put(&rdata->refcount,
3020 cifs_uncached_readdata_release);
3026 list_add_tail(&rdata->list, rdata_list);
3034 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3036 struct file *file = iocb->ki_filp;
3039 ssize_t total_read = 0;
3040 loff_t offset = iocb->ki_pos;
3041 struct cifs_sb_info *cifs_sb;
3042 struct cifs_tcon *tcon;
3043 struct cifsFileInfo *open_file;
3044 struct cifs_readdata *rdata, *tmp;
3045 struct list_head rdata_list;
3047 len = iov_iter_count(to);
3051 INIT_LIST_HEAD(&rdata_list);
3052 cifs_sb = CIFS_FILE_SB(file);
3053 open_file = file->private_data;
3054 tcon = tlink_tcon(open_file->tlink);
3056 if (!tcon->ses->server->ops->async_readv)
3059 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3060 cifs_dbg(FYI, "attempting read on write only file instance\n");
3062 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3064 /* if at least one read request send succeeded, then reset rc */
3065 if (!list_empty(&rdata_list))
3068 len = iov_iter_count(to);
3069 /* the loop below should proceed in the order of increasing offsets */
3071 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3073 /* FIXME: freezable sleep too? */
3074 rc = wait_for_completion_killable(&rdata->done);
3077 else if (rdata->result == -EAGAIN) {
3078 /* resend call if it's a retryable error */
3079 struct list_head tmp_list;
3080 unsigned int got_bytes = rdata->got_bytes;
3082 list_del_init(&rdata->list);
3083 INIT_LIST_HEAD(&tmp_list);
3086 * Got a part of data and then reconnect has
3087 * happened -- fill the buffer and continue
3090 if (got_bytes && got_bytes < rdata->bytes) {
3091 rc = cifs_readdata_to_iov(rdata, to);
3093 kref_put(&rdata->refcount,
3094 cifs_uncached_readdata_release);
3099 rc = cifs_send_async_read(
3100 rdata->offset + got_bytes,
3101 rdata->bytes - got_bytes,
3102 rdata->cfile, cifs_sb,
3105 list_splice(&tmp_list, &rdata_list);
3107 kref_put(&rdata->refcount,
3108 cifs_uncached_readdata_release);
3110 } else if (rdata->result)
3113 rc = cifs_readdata_to_iov(rdata, to);
3115 /* if there was a short read -- discard anything left */
3116 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3119 list_del_init(&rdata->list);
3120 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3123 total_read = len - iov_iter_count(to);
3125 cifs_stats_bytes_read(tcon, total_read);
3127 /* mask nodata case */
3132 iocb->ki_pos += total_read;
3139 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3141 struct inode *inode = file_inode(iocb->ki_filp);
3142 struct cifsInodeInfo *cinode = CIFS_I(inode);
3143 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3144 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3145 iocb->ki_filp->private_data;
3146 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3150 * In strict cache mode we need to read from the server all the time
3151 * if we don't have level II oplock because the server can delay mtime
3152 * change - so we can't make a decision about inode invalidating.
3153 * And we can also fail with pagereading if there are mandatory locks
3154 * on pages affected by this read but not on the region from pos to
3157 if (!CIFS_CACHE_READ(cinode))
3158 return cifs_user_readv(iocb, to);
3160 if (cap_unix(tcon->ses) &&
3161 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3162 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3163 return generic_file_read_iter(iocb, to);
3166 * We need to hold the sem to be sure nobody modifies lock list
3167 * with a brlock that prevents reading.
3169 down_read(&cinode->lock_sem);
3170 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3171 tcon->ses->server->vals->shared_lock_type,
3172 NULL, CIFS_READ_OP))
3173 rc = generic_file_read_iter(iocb, to);
3174 up_read(&cinode->lock_sem);
3179 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3182 unsigned int bytes_read = 0;
3183 unsigned int total_read;
3184 unsigned int current_read_size;
3186 struct cifs_sb_info *cifs_sb;
3187 struct cifs_tcon *tcon;
3188 struct TCP_Server_Info *server;
3191 struct cifsFileInfo *open_file;
3192 struct cifs_io_parms io_parms;
3193 int buf_type = CIFS_NO_BUFFER;
3197 cifs_sb = CIFS_FILE_SB(file);
3199 /* FIXME: set up handlers for larger reads and/or convert to async */
3200 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3202 if (file->private_data == NULL) {
3207 open_file = file->private_data;
3208 tcon = tlink_tcon(open_file->tlink);
3209 server = tcon->ses->server;
3211 if (!server->ops->sync_read) {
3216 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3217 pid = open_file->pid;
3219 pid = current->tgid;
3221 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3222 cifs_dbg(FYI, "attempting read on write only file instance\n");
3224 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3225 total_read += bytes_read, cur_offset += bytes_read) {
3227 current_read_size = min_t(uint, read_size - total_read,
3230 * For windows me and 9x we do not want to request more
3231 * than it negotiated since it will refuse the read
3234 if (!(tcon->ses->capabilities &
3235 tcon->ses->server->vals->cap_large_files)) {
3236 current_read_size = min_t(uint,
3237 current_read_size, CIFSMaxBufSize);
3239 if (open_file->invalidHandle) {
3240 rc = cifs_reopen_file(open_file, true);
3245 io_parms.tcon = tcon;
3246 io_parms.offset = *offset;
3247 io_parms.length = current_read_size;
3248 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3249 &bytes_read, &cur_offset,
3251 } while (rc == -EAGAIN);
3253 if (rc || (bytes_read == 0)) {
3261 cifs_stats_bytes_read(tcon, total_read);
3262 *offset += bytes_read;
3270 * If the page is mmap'ed into a process' page tables, then we need to make
3271 * sure that it doesn't change while being written back.
3274 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3276 struct page *page = vmf->page;
3279 return VM_FAULT_LOCKED;
3282 static const struct vm_operations_struct cifs_file_vm_ops = {
3283 .fault = filemap_fault,
3284 .map_pages = filemap_map_pages,
3285 .page_mkwrite = cifs_page_mkwrite,
3288 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3291 struct inode *inode = file_inode(file);
3295 if (!CIFS_CACHE_READ(CIFS_I(inode)))
3296 rc = cifs_zap_mapping(inode);
3298 rc = generic_file_mmap(file, vma);
3300 vma->vm_ops = &cifs_file_vm_ops;
3306 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3312 rc = cifs_revalidate_file(file);
3314 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3317 rc = generic_file_mmap(file, vma);
3319 vma->vm_ops = &cifs_file_vm_ops;
3326 cifs_readv_complete(struct work_struct *work)
3328 unsigned int i, got_bytes;
3329 struct cifs_readdata *rdata = container_of(work,
3330 struct cifs_readdata, work);
3332 got_bytes = rdata->got_bytes;
3333 for (i = 0; i < rdata->nr_pages; i++) {
3334 struct page *page = rdata->pages[i];
3336 lru_cache_add_file(page);
3338 if (rdata->result == 0 ||
3339 (rdata->result == -EAGAIN && got_bytes)) {
3340 flush_dcache_page(page);
3341 SetPageUptodate(page);
3346 if (rdata->result == 0 ||
3347 (rdata->result == -EAGAIN && got_bytes))
3348 cifs_readpage_to_fscache(rdata->mapping->host, page);
3350 got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3352 page_cache_release(page);
3353 rdata->pages[i] = NULL;
3355 kref_put(&rdata->refcount, cifs_readdata_release);
3359 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3360 struct cifs_readdata *rdata, unsigned int len)
3366 unsigned int nr_pages = rdata->nr_pages;
3369 /* determine the eof that the server (probably) has */
3370 eof = CIFS_I(rdata->mapping->host)->server_eof;
3371 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3372 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3374 rdata->got_bytes = 0;
3375 rdata->tailsz = PAGE_CACHE_SIZE;
3376 for (i = 0; i < nr_pages; i++) {
3377 struct page *page = rdata->pages[i];
3379 if (len >= PAGE_CACHE_SIZE) {
3380 /* enough data to fill the page */
3381 iov.iov_base = kmap(page);
3382 iov.iov_len = PAGE_CACHE_SIZE;
3383 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3384 i, page->index, iov.iov_base, iov.iov_len);
3385 len -= PAGE_CACHE_SIZE;
3386 } else if (len > 0) {
3387 /* enough for partial page, fill and zero the rest */
3388 iov.iov_base = kmap(page);
3390 cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3391 i, page->index, iov.iov_base, iov.iov_len);
3392 memset(iov.iov_base + len,
3393 '\0', PAGE_CACHE_SIZE - len);
3394 rdata->tailsz = len;
3396 } else if (page->index > eof_index) {
3398 * The VFS will not try to do readahead past the
3399 * i_size, but it's possible that we have outstanding
3400 * writes with gaps in the middle and the i_size hasn't
3401 * caught up yet. Populate those with zeroed out pages
3402 * to prevent the VFS from repeatedly attempting to
3403 * fill them until the writes are flushed.
3405 zero_user(page, 0, PAGE_CACHE_SIZE);
3406 lru_cache_add_file(page);
3407 flush_dcache_page(page);
3408 SetPageUptodate(page);
3410 page_cache_release(page);
3411 rdata->pages[i] = NULL;
3415 /* no need to hold page hostage */
3416 lru_cache_add_file(page);
3418 page_cache_release(page);
3419 rdata->pages[i] = NULL;
3424 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3429 rdata->got_bytes += result;
3432 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3433 rdata->got_bytes : result;
3437 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3438 unsigned int rsize, struct list_head *tmplist,
3439 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3441 struct page *page, *tpage;
3442 unsigned int expected_index;
3444 gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
3446 INIT_LIST_HEAD(tmplist);
3448 page = list_entry(page_list->prev, struct page, lru);
3451 * Lock the page and put it in the cache. Since no one else
3452 * should have access to this page, we're safe to simply set
3453 * PG_locked without checking it first.
3455 __set_page_locked(page);
3456 rc = add_to_page_cache_locked(page, mapping,
3459 /* give up if we can't stick it in the cache */
3461 __clear_page_locked(page);
3465 /* move first page to the tmplist */
3466 *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3467 *bytes = PAGE_CACHE_SIZE;
3469 list_move_tail(&page->lru, tmplist);
3471 /* now try and add more pages onto the request */
3472 expected_index = page->index + 1;
3473 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3474 /* discontinuity ? */
3475 if (page->index != expected_index)
3478 /* would this page push the read over the rsize? */
3479 if (*bytes + PAGE_CACHE_SIZE > rsize)
3482 __set_page_locked(page);
3483 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3484 __clear_page_locked(page);
3487 list_move_tail(&page->lru, tmplist);
3488 (*bytes) += PAGE_CACHE_SIZE;
3495 static int cifs_readpages(struct file *file, struct address_space *mapping,
3496 struct list_head *page_list, unsigned num_pages)
3499 struct list_head tmplist;
3500 struct cifsFileInfo *open_file = file->private_data;
3501 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3502 struct TCP_Server_Info *server;
3506 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3507 * immediately if the cookie is negative
3509 * After this point, every page in the list might have PG_fscache set,
3510 * so we will need to clean that up off of every page we don't use.
3512 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3517 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3518 pid = open_file->pid;
3520 pid = current->tgid;
3523 server = tlink_tcon(open_file->tlink)->ses->server;
3525 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3526 __func__, file, mapping, num_pages);
3529 * Start with the page at end of list and move it to private
3530 * list. Do the same with any following pages until we hit
3531 * the rsize limit, hit an index discontinuity, or run out of
3532 * pages. Issue the async read and then start the loop again
3533 * until the list is empty.
3535 * Note that list order is important. The page_list is in
3536 * the order of declining indexes. When we put the pages in
3537 * the rdata->pages, then we want them in increasing order.
3539 while (!list_empty(page_list)) {
3540 unsigned int i, nr_pages, bytes, rsize;
3542 struct page *page, *tpage;
3543 struct cifs_readdata *rdata;
3546 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3552 * Give up immediately if rsize is too small to read an entire
3553 * page. The VFS will fall back to readpage. We should never
3554 * reach this point however since we set ra_pages to 0 when the
3555 * rsize is smaller than a cache page.
3557 if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3558 add_credits_and_wake_if(server, credits, 0);
3562 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3563 &nr_pages, &offset, &bytes);
3565 add_credits_and_wake_if(server, credits, 0);
3569 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3571 /* best to give up if we're out of mem */
3572 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3573 list_del(&page->lru);
3574 lru_cache_add_file(page);
3576 page_cache_release(page);
3579 add_credits_and_wake_if(server, credits, 0);
3583 rdata->cfile = cifsFileInfo_get(open_file);
3584 rdata->mapping = mapping;
3585 rdata->offset = offset;
3586 rdata->bytes = bytes;
3588 rdata->pagesz = PAGE_CACHE_SIZE;
3589 rdata->read_into_pages = cifs_readpages_read_into_pages;
3590 rdata->credits = credits;
3592 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3593 list_del(&page->lru);
3594 rdata->pages[rdata->nr_pages++] = page;
3597 if (!rdata->cfile->invalidHandle ||
3598 !(rc = cifs_reopen_file(rdata->cfile, true)))
3599 rc = server->ops->async_readv(rdata);
3601 add_credits_and_wake_if(server, rdata->credits, 0);
3602 for (i = 0; i < rdata->nr_pages; i++) {
3603 page = rdata->pages[i];
3604 lru_cache_add_file(page);
3606 page_cache_release(page);
3608 /* Fallback to the readpage in error/reconnect cases */
3609 kref_put(&rdata->refcount, cifs_readdata_release);
3613 kref_put(&rdata->refcount, cifs_readdata_release);
3616 /* Any pages that have been shown to fscache but didn't get added to
3617 * the pagecache must be uncached before they get returned to the
3620 cifs_fscache_readpages_cancel(mapping->host, page_list);
3625 * cifs_readpage_worker must be called with the page pinned
3627 static int cifs_readpage_worker(struct file *file, struct page *page,
3633 /* Is the page cached? */
3634 rc = cifs_readpage_from_fscache(file_inode(file), page);
3638 read_data = kmap(page);
3639 /* for reads over a certain size could initiate async read ahead */
3641 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3646 cifs_dbg(FYI, "Bytes read %d\n", rc);
3648 file_inode(file)->i_atime =
3649 current_fs_time(file_inode(file)->i_sb);
3651 if (PAGE_CACHE_SIZE > rc)
3652 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3654 flush_dcache_page(page);
3655 SetPageUptodate(page);
3657 /* send this page to the cache */
3658 cifs_readpage_to_fscache(file_inode(file), page);
3670 static int cifs_readpage(struct file *file, struct page *page)
3672 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3678 if (file->private_data == NULL) {
3684 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3685 page, (int)offset, (int)offset);
3687 rc = cifs_readpage_worker(file, page, &offset);
3693 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3695 struct cifsFileInfo *open_file;
3696 struct cifs_tcon *tcon =
3697 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3699 spin_lock(&tcon->open_file_lock);
3700 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3701 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3702 spin_unlock(&tcon->open_file_lock);
3706 spin_unlock(&tcon->open_file_lock);
3710 /* We do not want to update the file size from server for inodes
3711 open for write - to avoid races with writepage extending
3712 the file - in the future we could consider allowing
3713 refreshing the inode only on increases in the file size
3714 but this is tricky to do without racing with writebehind
3715 page caching in the current Linux kernel design */
3716 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3721 if (is_inode_writable(cifsInode)) {
3722 /* This inode is open for write at least once */
3723 struct cifs_sb_info *cifs_sb;
3725 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3726 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3727 /* since no page cache to corrupt on directio
3728 we can change size safely */
3732 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3740 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3741 loff_t pos, unsigned len, unsigned flags,
3742 struct page **pagep, void **fsdata)
3745 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3746 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3747 loff_t page_start = pos & PAGE_MASK;
3752 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3755 page = grab_cache_page_write_begin(mapping, index, flags);
3761 if (PageUptodate(page))
3765 * If we write a full page it will be up to date, no need to read from
3766 * the server. If the write is short, we'll end up doing a sync write
3769 if (len == PAGE_CACHE_SIZE)
3773 * optimize away the read when we have an oplock, and we're not
3774 * expecting to use any of the data we'd be reading in. That
3775 * is, when the page lies beyond the EOF, or straddles the EOF
3776 * and the write will cover all of the existing data.
3778 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3779 i_size = i_size_read(mapping->host);
3780 if (page_start >= i_size ||
3781 (offset == 0 && (pos + len) >= i_size)) {
3782 zero_user_segments(page, 0, offset,
3786 * PageChecked means that the parts of the page
3787 * to which we're not writing are considered up
3788 * to date. Once the data is copied to the
3789 * page, it can be set uptodate.
3791 SetPageChecked(page);
3796 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3798 * might as well read a page, it is fast enough. If we get
3799 * an error, we don't need to return it. cifs_write_end will
3800 * do a sync write instead since PG_uptodate isn't set.
3802 cifs_readpage_worker(file, page, &page_start);
3803 page_cache_release(page);
3807 /* we could try using another file handle if there is one -
3808 but how would we lock it to prevent close of that handle
3809 racing with this read? In any case
3810 this will be written out by write_end so is fine */
3817 static int cifs_release_page(struct page *page, gfp_t gfp)
3819 if (PagePrivate(page))
3822 return cifs_fscache_release_page(page, gfp);
3825 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3826 unsigned int length)
3828 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3830 if (offset == 0 && length == PAGE_CACHE_SIZE)
3831 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3834 static int cifs_launder_page(struct page *page)
3837 loff_t range_start = page_offset(page);
3838 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3839 struct writeback_control wbc = {
3840 .sync_mode = WB_SYNC_ALL,
3842 .range_start = range_start,
3843 .range_end = range_end,
3846 cifs_dbg(FYI, "Launder page: %p\n", page);
3848 if (clear_page_dirty_for_io(page))
3849 rc = cifs_writepage_locked(page, &wbc);
3851 cifs_fscache_invalidate_page(page, page->mapping->host);
3855 void cifs_oplock_break(struct work_struct *work)
3857 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3859 struct inode *inode = d_inode(cfile->dentry);
3860 struct cifsInodeInfo *cinode = CIFS_I(inode);
3861 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3862 struct TCP_Server_Info *server = tcon->ses->server;
3865 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3866 TASK_UNINTERRUPTIBLE);
3868 server->ops->downgrade_oplock(server, cinode,
3869 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3871 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3872 cifs_has_mand_locks(cinode)) {
3873 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3878 if (inode && S_ISREG(inode->i_mode)) {
3879 if (CIFS_CACHE_READ(cinode))
3880 break_lease(inode, O_RDONLY);
3882 break_lease(inode, O_WRONLY);
3883 rc = filemap_fdatawrite(inode->i_mapping);
3884 if (!CIFS_CACHE_READ(cinode)) {
3885 rc = filemap_fdatawait(inode->i_mapping);
3886 mapping_set_error(inode->i_mapping, rc);
3887 cifs_zap_mapping(inode);
3889 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3892 rc = cifs_push_locks(cfile);
3894 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3897 * releasing stale oplock after recent reconnect of smb session using
3898 * a now incorrect file handle is not a data integrity issue but do
3899 * not bother sending an oplock release if session to server still is
3900 * disconnected since oplock already released by the server
3902 if (!cfile->oplock_break_cancelled) {
3903 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3905 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3907 cifs_done_oplock_break(cinode);
3911 * The presence of cifs_direct_io() in the address space ops vector
3912 * allowes open() O_DIRECT flags which would have failed otherwise.
3914 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3915 * so this method should never be called.
3917 * Direct IO is not yet supported in the cached mode.
3920 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
3924 * Eventually need to support direct IO for non forcedirectio mounts
3930 const struct address_space_operations cifs_addr_ops = {
3931 .readpage = cifs_readpage,
3932 .readpages = cifs_readpages,
3933 .writepage = cifs_writepage,
3934 .writepages = cifs_writepages,
3935 .write_begin = cifs_write_begin,
3936 .write_end = cifs_write_end,
3937 .set_page_dirty = __set_page_dirty_nobuffers,
3938 .releasepage = cifs_release_page,
3939 .direct_IO = cifs_direct_io,
3940 .invalidatepage = cifs_invalidate_page,
3941 .launder_page = cifs_launder_page,
3945 * cifs_readpages requires the server to support a buffer large enough to
3946 * contain the header plus one complete page of data. Otherwise, we need
3947 * to leave cifs_readpages out of the address space operations.
3949 const struct address_space_operations cifs_addr_ops_smallbuf = {
3950 .readpage = cifs_readpage,
3951 .writepage = cifs_writepage,
3952 .writepages = cifs_writepages,
3953 .write_begin = cifs_write_begin,
3954 .write_end = cifs_write_end,
3955 .set_page_dirty = __set_page_dirty_nobuffers,
3956 .releasepage = cifs_release_page,
3957 .invalidatepage = cifs_invalidate_page,
3958 .launder_page = cifs_launder_page,