4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
47 static inline int cifs_convert_flags(unsigned int flags)
49 if ((flags & O_ACCMODE) == O_RDONLY)
51 else if ((flags & O_ACCMODE) == O_WRONLY)
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
65 static u32 cifs_posix_convert_flags(unsigned int flags)
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
94 posix_flags |= SMB_O_DIRECT;
99 static inline int cifs_get_disposition(unsigned int flags)
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
125 cifs_dbg(FYI, "posix open %s\n", full_path);
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
131 tlink = cifs_sb_tlink(cifs_sb);
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_remap(cifs_sb));
144 cifs_put_tlink(tlink);
149 if (presp_data->Type == cpu_to_le32(-1))
150 goto posix_open_ret; /* open ok, caller does qpathinfo */
153 goto posix_open_ret; /* caller does not need info */
155 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157 /* get new inode and set it up */
158 if (*pinode == NULL) {
159 cifs_fill_uniqueid(sb, &fattr);
160 *pinode = cifs_iget(sb, &fattr);
166 cifs_revalidate_mapping(*pinode);
167 cifs_fattr_to_inode(*pinode, &fattr);
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178 struct cifs_fid *fid, unsigned int xid)
183 int create_options = CREATE_NOT_DIR;
185 struct TCP_Server_Info *server = tcon->ses->server;
186 struct cifs_open_parms oparms;
188 if (!server->ops->open)
191 desired_access = cifs_convert_flags(f_flags);
193 /*********************************************************************
194 * open flag mapping table:
196 * POSIX Flag CIFS Disposition
197 * ---------- ----------------
198 * O_CREAT FILE_OPEN_IF
199 * O_CREAT | O_EXCL FILE_CREATE
200 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
201 * O_TRUNC FILE_OVERWRITE
202 * none of the above FILE_OPEN
204 * Note that there is not a direct match between disposition
205 * FILE_SUPERSEDE (ie create whether or not file exists although
206 * O_CREAT | O_TRUNC is similar but truncates the existing
207 * file rather than creating a new file as FILE_SUPERSEDE does
208 * (which uses the attributes / metadata passed in on open call)
210 *? O_SYNC is a reasonable match to CIFS writethrough flag
211 *? and the read write flags match reasonably. O_LARGEFILE
212 *? is irrelevant because largefile support is always used
213 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215 *********************************************************************/
217 disposition = cifs_get_disposition(f_flags);
219 /* BB pass O_SYNC flag through on file attributes .. BB */
221 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
225 if (backup_cred(cifs_sb))
226 create_options |= CREATE_OPEN_BACKUP_INTENT;
228 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229 if (f_flags & O_SYNC)
230 create_options |= CREATE_WRITE_THROUGH;
232 if (f_flags & O_DIRECT)
233 create_options |= CREATE_NO_BUFFER;
236 oparms.cifs_sb = cifs_sb;
237 oparms.desired_access = desired_access;
238 oparms.create_options = create_options;
239 oparms.disposition = disposition;
240 oparms.path = full_path;
242 oparms.reconnect = false;
244 rc = server->ops->open(xid, &oparms, oplock, buf);
250 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
253 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
257 server->ops->close(xid, tcon, fid);
268 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
270 struct cifs_fid_locks *cur;
271 bool has_locks = false;
273 down_read(&cinode->lock_sem);
274 list_for_each_entry(cur, &cinode->llist, llist) {
275 if (!list_empty(&cur->locks)) {
280 up_read(&cinode->lock_sem);
285 cifs_down_write(struct rw_semaphore *sem)
287 while (!down_write_trylock(sem))
291 struct cifsFileInfo *
292 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
293 struct tcon_link *tlink, __u32 oplock)
295 struct dentry *dentry = file_dentry(file);
296 struct inode *inode = d_inode(dentry);
297 struct cifsInodeInfo *cinode = CIFS_I(inode);
298 struct cifsFileInfo *cfile;
299 struct cifs_fid_locks *fdlocks;
300 struct cifs_tcon *tcon = tlink_tcon(tlink);
301 struct TCP_Server_Info *server = tcon->ses->server;
303 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
307 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
313 INIT_LIST_HEAD(&fdlocks->locks);
314 fdlocks->cfile = cfile;
315 cfile->llist = fdlocks;
318 cfile->pid = current->tgid;
319 cfile->uid = current_fsuid();
320 cfile->dentry = dget(dentry);
321 cfile->f_flags = file->f_flags;
322 cfile->invalidHandle = false;
323 cfile->tlink = cifs_get_tlink(tlink);
324 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
325 mutex_init(&cfile->fh_mutex);
326 spin_lock_init(&cfile->file_info_lock);
328 cifs_sb_active(inode->i_sb);
331 * If the server returned a read oplock and we have mandatory brlocks,
332 * set oplock level to None.
334 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
335 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
339 cifs_down_write(&cinode->lock_sem);
340 list_add(&fdlocks->llist, &cinode->llist);
341 up_write(&cinode->lock_sem);
343 spin_lock(&tcon->open_file_lock);
344 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
345 oplock = fid->pending_open->oplock;
346 list_del(&fid->pending_open->olist);
348 fid->purge_cache = false;
349 server->ops->set_fid(cfile, fid, oplock);
351 list_add(&cfile->tlist, &tcon->openFileList);
353 /* if readable file instance put first in list*/
354 if (file->f_mode & FMODE_READ)
355 list_add(&cfile->flist, &cinode->openFileList);
357 list_add_tail(&cfile->flist, &cinode->openFileList);
358 spin_unlock(&tcon->open_file_lock);
360 if (fid->purge_cache)
361 cifs_zap_mapping(inode);
363 file->private_data = cfile;
367 struct cifsFileInfo *
368 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
370 spin_lock(&cifs_file->file_info_lock);
371 cifsFileInfo_get_locked(cifs_file);
372 spin_unlock(&cifs_file->file_info_lock);
377 * cifsFileInfo_put - release a reference of file priv data
379 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
381 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
383 _cifsFileInfo_put(cifs_file, true);
387 * _cifsFileInfo_put - release a reference of file priv data
389 * This may involve closing the filehandle @cifs_file out on the
390 * server. Must be called without holding tcon->open_file_lock and
391 * cifs_file->file_info_lock.
393 * If @wait_for_oplock_handler is true and we are releasing the last
394 * reference, wait for any running oplock break handler of the file
395 * and cancel any pending one. If calling this function from the
396 * oplock break handler, you need to pass false.
399 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
401 struct inode *inode = d_inode(cifs_file->dentry);
402 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
403 struct TCP_Server_Info *server = tcon->ses->server;
404 struct cifsInodeInfo *cifsi = CIFS_I(inode);
405 struct super_block *sb = inode->i_sb;
406 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
407 struct cifsLockInfo *li, *tmp;
409 struct cifs_pending_open open;
410 bool oplock_break_cancelled;
412 spin_lock(&tcon->open_file_lock);
414 spin_lock(&cifs_file->file_info_lock);
415 if (--cifs_file->count > 0) {
416 spin_unlock(&cifs_file->file_info_lock);
417 spin_unlock(&tcon->open_file_lock);
420 spin_unlock(&cifs_file->file_info_lock);
422 if (server->ops->get_lease_key)
423 server->ops->get_lease_key(inode, &fid);
425 /* store open in pending opens to make sure we don't miss lease break */
426 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
428 /* remove it from the lists */
429 list_del(&cifs_file->flist);
430 list_del(&cifs_file->tlist);
432 if (list_empty(&cifsi->openFileList)) {
433 cifs_dbg(FYI, "closing last open instance for inode %p\n",
434 d_inode(cifs_file->dentry));
436 * In strict cache mode we need invalidate mapping on the last
437 * close because it may cause a error when we open this file
438 * again and get at least level II oplock.
440 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
441 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
442 cifs_set_oplock_level(cifsi, 0);
445 spin_unlock(&tcon->open_file_lock);
447 oplock_break_cancelled = wait_oplock_handler ?
448 cancel_work_sync(&cifs_file->oplock_break) : false;
450 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
451 struct TCP_Server_Info *server = tcon->ses->server;
455 if (server->ops->close)
456 server->ops->close(xid, tcon, &cifs_file->fid);
460 if (oplock_break_cancelled)
461 cifs_done_oplock_break(cifsi);
463 cifs_del_pending_open(&open);
466 * Delete any outstanding lock records. We'll lose them when the file
469 cifs_down_write(&cifsi->lock_sem);
470 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
471 list_del(&li->llist);
472 cifs_del_lock_waiters(li);
475 list_del(&cifs_file->llist->llist);
476 kfree(cifs_file->llist);
477 up_write(&cifsi->lock_sem);
479 cifs_put_tlink(cifs_file->tlink);
480 dput(cifs_file->dentry);
481 cifs_sb_deactive(sb);
485 int cifs_open(struct inode *inode, struct file *file)
491 struct cifs_sb_info *cifs_sb;
492 struct TCP_Server_Info *server;
493 struct cifs_tcon *tcon;
494 struct tcon_link *tlink;
495 struct cifsFileInfo *cfile = NULL;
496 char *full_path = NULL;
497 bool posix_open_ok = false;
499 struct cifs_pending_open open;
503 cifs_sb = CIFS_SB(inode->i_sb);
504 tlink = cifs_sb_tlink(cifs_sb);
507 return PTR_ERR(tlink);
509 tcon = tlink_tcon(tlink);
510 server = tcon->ses->server;
512 full_path = build_path_from_dentry(file_dentry(file));
513 if (full_path == NULL) {
518 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
519 inode, file->f_flags, full_path);
521 if (file->f_flags & O_DIRECT &&
522 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
523 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
524 file->f_op = &cifs_file_direct_nobrl_ops;
526 file->f_op = &cifs_file_direct_ops;
534 if (!tcon->broken_posix_open && tcon->unix_ext &&
535 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
536 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
537 /* can not refresh inode info since size could be stale */
538 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
539 cifs_sb->mnt_file_mode /* ignored */,
540 file->f_flags, &oplock, &fid.netfid, xid);
542 cifs_dbg(FYI, "posix open succeeded\n");
543 posix_open_ok = true;
544 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
545 if (tcon->ses->serverNOS)
546 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
547 tcon->ses->serverName,
548 tcon->ses->serverNOS);
549 tcon->broken_posix_open = true;
550 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
551 (rc != -EOPNOTSUPP)) /* path not found or net err */
554 * Else fallthrough to retry open the old way on network i/o
559 if (server->ops->get_lease_key)
560 server->ops->get_lease_key(inode, &fid);
562 cifs_add_pending_open(&fid, tlink, &open);
564 if (!posix_open_ok) {
565 if (server->ops->get_lease_key)
566 server->ops->get_lease_key(inode, &fid);
568 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
569 file->f_flags, &oplock, &fid, xid);
571 cifs_del_pending_open(&open);
576 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
578 if (server->ops->close)
579 server->ops->close(xid, tcon, &fid);
580 cifs_del_pending_open(&open);
585 cifs_fscache_set_inode_cookie(inode, file);
587 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
589 * Time to set mode which we can not set earlier due to
590 * problems creating new read-only files.
592 struct cifs_unix_set_info_args args = {
593 .mode = inode->i_mode,
594 .uid = INVALID_UID, /* no change */
595 .gid = INVALID_GID, /* no change */
596 .ctime = NO_CHANGE_64,
597 .atime = NO_CHANGE_64,
598 .mtime = NO_CHANGE_64,
601 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
608 cifs_put_tlink(tlink);
612 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
615 * Try to reacquire byte range locks that were released when session
616 * to server was lost.
619 cifs_relock_file(struct cifsFileInfo *cfile)
621 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
622 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
623 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
626 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
627 if (cinode->can_cache_brlcks) {
628 /* can cache locks - no need to relock */
629 up_read(&cinode->lock_sem);
633 if (cap_unix(tcon->ses) &&
634 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
635 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
636 rc = cifs_push_posix_locks(cfile);
638 rc = tcon->ses->server->ops->push_mand_locks(cfile);
640 up_read(&cinode->lock_sem);
645 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
650 struct cifs_sb_info *cifs_sb;
651 struct cifs_tcon *tcon;
652 struct TCP_Server_Info *server;
653 struct cifsInodeInfo *cinode;
655 char *full_path = NULL;
657 int disposition = FILE_OPEN;
658 int create_options = CREATE_NOT_DIR;
659 struct cifs_open_parms oparms;
662 mutex_lock(&cfile->fh_mutex);
663 if (!cfile->invalidHandle) {
664 mutex_unlock(&cfile->fh_mutex);
670 inode = d_inode(cfile->dentry);
671 cifs_sb = CIFS_SB(inode->i_sb);
672 tcon = tlink_tcon(cfile->tlink);
673 server = tcon->ses->server;
676 * Can not grab rename sem here because various ops, including those
677 * that already have the rename sem can end up causing writepage to get
678 * called and if the server was down that means we end up here, and we
679 * can never tell if the caller already has the rename_sem.
681 full_path = build_path_from_dentry(cfile->dentry);
682 if (full_path == NULL) {
684 mutex_unlock(&cfile->fh_mutex);
689 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
690 inode, cfile->f_flags, full_path);
692 if (tcon->ses->server->oplocks)
697 if (tcon->unix_ext && cap_unix(tcon->ses) &&
698 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
699 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
701 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
702 * original open. Must mask them off for a reopen.
704 unsigned int oflags = cfile->f_flags &
705 ~(O_CREAT | O_EXCL | O_TRUNC);
707 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
708 cifs_sb->mnt_file_mode /* ignored */,
709 oflags, &oplock, &cfile->fid.netfid, xid);
711 cifs_dbg(FYI, "posix reopen succeeded\n");
712 oparms.reconnect = true;
716 * fallthrough to retry open the old way on errors, especially
717 * in the reconnect path it is important to retry hard
721 desired_access = cifs_convert_flags(cfile->f_flags);
723 if (backup_cred(cifs_sb))
724 create_options |= CREATE_OPEN_BACKUP_INTENT;
726 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
727 if (cfile->f_flags & O_SYNC)
728 create_options |= CREATE_WRITE_THROUGH;
730 if (cfile->f_flags & O_DIRECT)
731 create_options |= CREATE_NO_BUFFER;
733 if (server->ops->get_lease_key)
734 server->ops->get_lease_key(inode, &cfile->fid);
737 oparms.cifs_sb = cifs_sb;
738 oparms.desired_access = desired_access;
739 oparms.create_options = create_options;
740 oparms.disposition = disposition;
741 oparms.path = full_path;
742 oparms.fid = &cfile->fid;
743 oparms.reconnect = true;
746 * Can not refresh inode by passing in file_info buf to be returned by
747 * ops->open and then calling get_inode_info with returned buf since
748 * file might have write behind data that needs to be flushed and server
749 * version of file size can be stale. If we knew for sure that inode was
750 * not dirty locally we could do this.
752 rc = server->ops->open(xid, &oparms, &oplock, NULL);
753 if (rc == -ENOENT && oparms.reconnect == false) {
754 /* durable handle timeout is expired - open the file again */
755 rc = server->ops->open(xid, &oparms, &oplock, NULL);
756 /* indicate that we need to relock the file */
757 oparms.reconnect = true;
761 mutex_unlock(&cfile->fh_mutex);
762 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
763 cifs_dbg(FYI, "oplock: %d\n", oplock);
764 goto reopen_error_exit;
768 cfile->invalidHandle = false;
769 mutex_unlock(&cfile->fh_mutex);
770 cinode = CIFS_I(inode);
773 rc = filemap_write_and_wait(inode->i_mapping);
774 mapping_set_error(inode->i_mapping, rc);
777 rc = cifs_get_inode_info_unix(&inode, full_path,
780 rc = cifs_get_inode_info(&inode, full_path, NULL,
781 inode->i_sb, xid, NULL);
784 * Else we are writing out data to server already and could deadlock if
785 * we tried to flush data, and since we do not know if we have data that
786 * would invalidate the current end of file on the server we can not go
787 * to the server to get the new inode info.
791 * If the server returned a read oplock and we have mandatory brlocks,
792 * set oplock level to None.
794 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
795 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
799 server->ops->set_fid(cfile, &cfile->fid, oplock);
800 if (oparms.reconnect)
801 cifs_relock_file(cfile);
809 int cifs_close(struct inode *inode, struct file *file)
811 if (file->private_data != NULL) {
812 cifsFileInfo_put(file->private_data);
813 file->private_data = NULL;
816 /* return code from the ->release op is always ignored */
821 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
823 struct cifsFileInfo *open_file;
824 struct list_head *tmp;
825 struct list_head *tmp1;
826 struct list_head tmp_list;
828 if (!tcon->use_persistent || !tcon->need_reopen_files)
831 tcon->need_reopen_files = false;
833 cifs_dbg(FYI, "Reopen persistent handles");
834 INIT_LIST_HEAD(&tmp_list);
836 /* list all files open on tree connection, reopen resilient handles */
837 spin_lock(&tcon->open_file_lock);
838 list_for_each(tmp, &tcon->openFileList) {
839 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
840 if (!open_file->invalidHandle)
842 cifsFileInfo_get(open_file);
843 list_add_tail(&open_file->rlist, &tmp_list);
845 spin_unlock(&tcon->open_file_lock);
847 list_for_each_safe(tmp, tmp1, &tmp_list) {
848 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
849 if (cifs_reopen_file(open_file, false /* do not flush */))
850 tcon->need_reopen_files = true;
851 list_del_init(&open_file->rlist);
852 cifsFileInfo_put(open_file);
856 int cifs_closedir(struct inode *inode, struct file *file)
860 struct cifsFileInfo *cfile = file->private_data;
861 struct cifs_tcon *tcon;
862 struct TCP_Server_Info *server;
865 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
871 tcon = tlink_tcon(cfile->tlink);
872 server = tcon->ses->server;
874 cifs_dbg(FYI, "Freeing private data in close dir\n");
875 spin_lock(&cfile->file_info_lock);
876 if (server->ops->dir_needs_close(cfile)) {
877 cfile->invalidHandle = true;
878 spin_unlock(&cfile->file_info_lock);
879 if (server->ops->close_dir)
880 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
883 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
884 /* not much we can do if it fails anyway, ignore rc */
887 spin_unlock(&cfile->file_info_lock);
889 buf = cfile->srch_inf.ntwrk_buf_start;
891 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
892 cfile->srch_inf.ntwrk_buf_start = NULL;
893 if (cfile->srch_inf.smallBuf)
894 cifs_small_buf_release(buf);
896 cifs_buf_release(buf);
899 cifs_put_tlink(cfile->tlink);
900 kfree(file->private_data);
901 file->private_data = NULL;
902 /* BB can we lock the filestruct while this is going on? */
907 static struct cifsLockInfo *
908 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
910 struct cifsLockInfo *lock =
911 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
914 lock->offset = offset;
915 lock->length = length;
917 lock->pid = current->tgid;
918 INIT_LIST_HEAD(&lock->blist);
919 init_waitqueue_head(&lock->block_q);
924 cifs_del_lock_waiters(struct cifsLockInfo *lock)
926 struct cifsLockInfo *li, *tmp;
927 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
928 list_del_init(&li->blist);
929 wake_up(&li->block_q);
933 #define CIFS_LOCK_OP 0
934 #define CIFS_READ_OP 1
935 #define CIFS_WRITE_OP 2
937 /* @rw_check : 0 - no op, 1 - read, 2 - write */
939 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
940 __u64 length, __u8 type, struct cifsFileInfo *cfile,
941 struct cifsLockInfo **conf_lock, int rw_check)
943 struct cifsLockInfo *li;
944 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
945 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
947 list_for_each_entry(li, &fdlocks->locks, llist) {
948 if (offset + length <= li->offset ||
949 offset >= li->offset + li->length)
951 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
952 server->ops->compare_fids(cfile, cur_cfile)) {
953 /* shared lock prevents write op through the same fid */
954 if (!(li->type & server->vals->shared_lock_type) ||
955 rw_check != CIFS_WRITE_OP)
958 if ((type & server->vals->shared_lock_type) &&
959 ((server->ops->compare_fids(cfile, cur_cfile) &&
960 current->tgid == li->pid) || type == li->type))
970 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
971 __u8 type, struct cifsLockInfo **conf_lock,
975 struct cifs_fid_locks *cur;
976 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
978 list_for_each_entry(cur, &cinode->llist, llist) {
979 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
980 cfile, conf_lock, rw_check);
989 * Check if there is another lock that prevents us to set the lock (mandatory
990 * style). If such a lock exists, update the flock structure with its
991 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
992 * or leave it the same if we can't. Returns 0 if we don't need to request to
993 * the server or 1 otherwise.
996 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
997 __u8 type, struct file_lock *flock)
1000 struct cifsLockInfo *conf_lock;
1001 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1002 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1005 down_read(&cinode->lock_sem);
1007 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1008 &conf_lock, CIFS_LOCK_OP);
1010 flock->fl_start = conf_lock->offset;
1011 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1012 flock->fl_pid = conf_lock->pid;
1013 if (conf_lock->type & server->vals->shared_lock_type)
1014 flock->fl_type = F_RDLCK;
1016 flock->fl_type = F_WRLCK;
1017 } else if (!cinode->can_cache_brlcks)
1020 flock->fl_type = F_UNLCK;
1022 up_read(&cinode->lock_sem);
1027 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1029 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1030 cifs_down_write(&cinode->lock_sem);
1031 list_add_tail(&lock->llist, &cfile->llist->locks);
1032 up_write(&cinode->lock_sem);
1036 * Set the byte-range lock (mandatory style). Returns:
1037 * 1) 0, if we set the lock and don't need to request to the server;
1038 * 2) 1, if no locks prevent us but we need to request to the server;
1039 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
1042 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1045 struct cifsLockInfo *conf_lock;
1046 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1052 cifs_down_write(&cinode->lock_sem);
1054 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1055 lock->type, &conf_lock, CIFS_LOCK_OP);
1056 if (!exist && cinode->can_cache_brlcks) {
1057 list_add_tail(&lock->llist, &cfile->llist->locks);
1058 up_write(&cinode->lock_sem);
1067 list_add_tail(&lock->blist, &conf_lock->blist);
1068 up_write(&cinode->lock_sem);
1069 rc = wait_event_interruptible(lock->block_q,
1070 (lock->blist.prev == &lock->blist) &&
1071 (lock->blist.next == &lock->blist));
1074 cifs_down_write(&cinode->lock_sem);
1075 list_del_init(&lock->blist);
1078 up_write(&cinode->lock_sem);
1083 * Check if there is another lock that prevents us to set the lock (posix
1084 * style). If such a lock exists, update the flock structure with its
1085 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1086 * or leave it the same if we can't. Returns 0 if we don't need to request to
1087 * the server or 1 otherwise.
1090 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1093 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1094 unsigned char saved_type = flock->fl_type;
1096 if ((flock->fl_flags & FL_POSIX) == 0)
1099 down_read(&cinode->lock_sem);
1100 posix_test_lock(file, flock);
1102 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1103 flock->fl_type = saved_type;
1107 up_read(&cinode->lock_sem);
1112 * Set the byte-range lock (posix style). Returns:
1113 * 1) 0, if we set the lock and don't need to request to the server;
1114 * 2) 1, if we need to request to the server;
1115 * 3) <0, if the error occurs while setting the lock.
1118 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1120 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1123 if ((flock->fl_flags & FL_POSIX) == 0)
1127 cifs_down_write(&cinode->lock_sem);
1128 if (!cinode->can_cache_brlcks) {
1129 up_write(&cinode->lock_sem);
1133 rc = posix_lock_file(file, flock, NULL);
1134 up_write(&cinode->lock_sem);
1135 if (rc == FILE_LOCK_DEFERRED) {
1136 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1139 posix_unblock_lock(flock);
1145 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1148 int rc = 0, stored_rc;
1149 struct cifsLockInfo *li, *tmp;
1150 struct cifs_tcon *tcon;
1151 unsigned int num, max_num, max_buf;
1152 LOCKING_ANDX_RANGE *buf, *cur;
1153 static const int types[] = {
1154 LOCKING_ANDX_LARGE_FILES,
1155 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1160 tcon = tlink_tcon(cfile->tlink);
1163 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1164 * and check it before using.
1166 max_buf = tcon->ses->server->maxBuf;
1167 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1172 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1174 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1176 max_num = (max_buf - sizeof(struct smb_hdr)) /
1177 sizeof(LOCKING_ANDX_RANGE);
1178 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1184 for (i = 0; i < 2; i++) {
1187 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1188 if (li->type != types[i])
1190 cur->Pid = cpu_to_le16(li->pid);
1191 cur->LengthLow = cpu_to_le32((u32)li->length);
1192 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1193 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1194 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1195 if (++num == max_num) {
1196 stored_rc = cifs_lockv(xid, tcon,
1198 (__u8)li->type, 0, num,
1209 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1210 (__u8)types[i], 0, num, buf);
1222 hash_lockowner(fl_owner_t owner)
1224 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1227 struct lock_to_push {
1228 struct list_head llist;
1237 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1239 struct inode *inode = d_inode(cfile->dentry);
1240 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1241 struct file_lock *flock;
1242 struct file_lock_context *flctx = inode->i_flctx;
1243 unsigned int count = 0, i;
1244 int rc = 0, xid, type;
1245 struct list_head locks_to_send, *el;
1246 struct lock_to_push *lck, *tmp;
1254 spin_lock(&flctx->flc_lock);
1255 list_for_each(el, &flctx->flc_posix) {
1258 spin_unlock(&flctx->flc_lock);
1260 INIT_LIST_HEAD(&locks_to_send);
1263 * Allocating count locks is enough because no FL_POSIX locks can be
1264 * added to the list while we are holding cinode->lock_sem that
1265 * protects locking operations of this inode.
1267 for (i = 0; i < count; i++) {
1268 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1273 list_add_tail(&lck->llist, &locks_to_send);
1276 el = locks_to_send.next;
1277 spin_lock(&flctx->flc_lock);
1278 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1279 if (el == &locks_to_send) {
1281 * The list ended. We don't have enough allocated
1282 * structures - something is really wrong.
1284 cifs_dbg(VFS, "Can't push all brlocks!\n");
1287 length = 1 + flock->fl_end - flock->fl_start;
1288 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1292 lck = list_entry(el, struct lock_to_push, llist);
1293 lck->pid = hash_lockowner(flock->fl_owner);
1294 lck->netfid = cfile->fid.netfid;
1295 lck->length = length;
1297 lck->offset = flock->fl_start;
1299 spin_unlock(&flctx->flc_lock);
1301 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1304 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1305 lck->offset, lck->length, NULL,
1309 list_del(&lck->llist);
1317 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1318 list_del(&lck->llist);
1325 cifs_push_locks(struct cifsFileInfo *cfile)
1327 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1328 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1329 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1332 /* we are going to update can_cache_brlcks here - need a write access */
1333 cifs_down_write(&cinode->lock_sem);
1334 if (!cinode->can_cache_brlcks) {
1335 up_write(&cinode->lock_sem);
1339 if (cap_unix(tcon->ses) &&
1340 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1341 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1342 rc = cifs_push_posix_locks(cfile);
1344 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1346 cinode->can_cache_brlcks = false;
1347 up_write(&cinode->lock_sem);
1352 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1353 bool *wait_flag, struct TCP_Server_Info *server)
1355 if (flock->fl_flags & FL_POSIX)
1356 cifs_dbg(FYI, "Posix\n");
1357 if (flock->fl_flags & FL_FLOCK)
1358 cifs_dbg(FYI, "Flock\n");
1359 if (flock->fl_flags & FL_SLEEP) {
1360 cifs_dbg(FYI, "Blocking lock\n");
1363 if (flock->fl_flags & FL_ACCESS)
1364 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1365 if (flock->fl_flags & FL_LEASE)
1366 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1367 if (flock->fl_flags &
1368 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1369 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1370 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1372 *type = server->vals->large_lock_type;
1373 if (flock->fl_type == F_WRLCK) {
1374 cifs_dbg(FYI, "F_WRLCK\n");
1375 *type |= server->vals->exclusive_lock_type;
1377 } else if (flock->fl_type == F_UNLCK) {
1378 cifs_dbg(FYI, "F_UNLCK\n");
1379 *type |= server->vals->unlock_lock_type;
1381 /* Check if unlock includes more than one lock range */
1382 } else if (flock->fl_type == F_RDLCK) {
1383 cifs_dbg(FYI, "F_RDLCK\n");
1384 *type |= server->vals->shared_lock_type;
1386 } else if (flock->fl_type == F_EXLCK) {
1387 cifs_dbg(FYI, "F_EXLCK\n");
1388 *type |= server->vals->exclusive_lock_type;
1390 } else if (flock->fl_type == F_SHLCK) {
1391 cifs_dbg(FYI, "F_SHLCK\n");
1392 *type |= server->vals->shared_lock_type;
1395 cifs_dbg(FYI, "Unknown type of lock\n");
1399 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1400 bool wait_flag, bool posix_lck, unsigned int xid)
1403 __u64 length = 1 + flock->fl_end - flock->fl_start;
1404 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1405 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1406 struct TCP_Server_Info *server = tcon->ses->server;
1407 __u16 netfid = cfile->fid.netfid;
1410 int posix_lock_type;
1412 rc = cifs_posix_lock_test(file, flock);
1416 if (type & server->vals->shared_lock_type)
1417 posix_lock_type = CIFS_RDLCK;
1419 posix_lock_type = CIFS_WRLCK;
1420 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1421 hash_lockowner(flock->fl_owner),
1422 flock->fl_start, length, flock,
1423 posix_lock_type, wait_flag);
1427 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1431 /* BB we could chain these into one lock request BB */
1432 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1435 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1437 flock->fl_type = F_UNLCK;
1439 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1444 if (type & server->vals->shared_lock_type) {
1445 flock->fl_type = F_WRLCK;
1449 type &= ~server->vals->exclusive_lock_type;
1451 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1452 type | server->vals->shared_lock_type,
1455 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1456 type | server->vals->shared_lock_type, 0, 1, false);
1457 flock->fl_type = F_RDLCK;
1459 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1462 flock->fl_type = F_WRLCK;
1468 cifs_move_llist(struct list_head *source, struct list_head *dest)
1470 struct list_head *li, *tmp;
1471 list_for_each_safe(li, tmp, source)
1472 list_move(li, dest);
1476 cifs_free_llist(struct list_head *llist)
1478 struct cifsLockInfo *li, *tmp;
1479 list_for_each_entry_safe(li, tmp, llist, llist) {
1480 cifs_del_lock_waiters(li);
1481 list_del(&li->llist);
1487 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1490 int rc = 0, stored_rc;
1491 static const int types[] = {
1492 LOCKING_ANDX_LARGE_FILES,
1493 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1496 unsigned int max_num, num, max_buf;
1497 LOCKING_ANDX_RANGE *buf, *cur;
1498 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1499 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1500 struct cifsLockInfo *li, *tmp;
1501 __u64 length = 1 + flock->fl_end - flock->fl_start;
1502 struct list_head tmp_llist;
1504 INIT_LIST_HEAD(&tmp_llist);
1507 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1508 * and check it before using.
1510 max_buf = tcon->ses->server->maxBuf;
1511 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1514 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1516 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1518 max_num = (max_buf - sizeof(struct smb_hdr)) /
1519 sizeof(LOCKING_ANDX_RANGE);
1520 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1524 cifs_down_write(&cinode->lock_sem);
1525 for (i = 0; i < 2; i++) {
1528 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1529 if (flock->fl_start > li->offset ||
1530 (flock->fl_start + length) <
1531 (li->offset + li->length))
1533 if (current->tgid != li->pid)
1535 if (types[i] != li->type)
1537 if (cinode->can_cache_brlcks) {
1539 * We can cache brlock requests - simply remove
1540 * a lock from the file's list.
1542 list_del(&li->llist);
1543 cifs_del_lock_waiters(li);
1547 cur->Pid = cpu_to_le16(li->pid);
1548 cur->LengthLow = cpu_to_le32((u32)li->length);
1549 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1550 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1551 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1553 * We need to save a lock here to let us add it again to
1554 * the file's list if the unlock range request fails on
1557 list_move(&li->llist, &tmp_llist);
1558 if (++num == max_num) {
1559 stored_rc = cifs_lockv(xid, tcon,
1561 li->type, num, 0, buf);
1564 * We failed on the unlock range
1565 * request - add all locks from the tmp
1566 * list to the head of the file's list.
1568 cifs_move_llist(&tmp_llist,
1569 &cfile->llist->locks);
1573 * The unlock range request succeed -
1574 * free the tmp list.
1576 cifs_free_llist(&tmp_llist);
1583 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1584 types[i], num, 0, buf);
1586 cifs_move_llist(&tmp_llist,
1587 &cfile->llist->locks);
1590 cifs_free_llist(&tmp_llist);
1594 up_write(&cinode->lock_sem);
1600 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1601 bool wait_flag, bool posix_lck, int lock, int unlock,
1605 __u64 length = 1 + flock->fl_end - flock->fl_start;
1606 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1607 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1608 struct TCP_Server_Info *server = tcon->ses->server;
1609 struct inode *inode = d_inode(cfile->dentry);
1612 int posix_lock_type;
1614 rc = cifs_posix_lock_set(file, flock);
1618 if (type & server->vals->shared_lock_type)
1619 posix_lock_type = CIFS_RDLCK;
1621 posix_lock_type = CIFS_WRLCK;
1624 posix_lock_type = CIFS_UNLCK;
1626 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1627 hash_lockowner(flock->fl_owner),
1628 flock->fl_start, length,
1629 NULL, posix_lock_type, wait_flag);
1634 struct cifsLockInfo *lock;
1636 lock = cifs_lock_init(flock->fl_start, length, type);
1640 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1649 * Windows 7 server can delay breaking lease from read to None
1650 * if we set a byte-range lock on a file - break it explicitly
1651 * before sending the lock to the server to be sure the next
1652 * read won't conflict with non-overlapted locks due to
1655 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1656 CIFS_CACHE_READ(CIFS_I(inode))) {
1657 cifs_zap_mapping(inode);
1658 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1660 CIFS_I(inode)->oplock = 0;
1663 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1664 type, 1, 0, wait_flag);
1670 cifs_lock_add(cfile, lock);
1672 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1675 if (flock->fl_flags & FL_POSIX) {
1677 * If this is a request to remove all locks because we
1678 * are closing the file, it doesn't matter if the
1679 * unlocking failed as both cifs.ko and the SMB server
1680 * remove the lock on file close
1683 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1684 if (!(flock->fl_flags & FL_CLOSE))
1687 rc = locks_lock_file_wait(file, flock);
1692 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1695 int lock = 0, unlock = 0;
1696 bool wait_flag = false;
1697 bool posix_lck = false;
1698 struct cifs_sb_info *cifs_sb;
1699 struct cifs_tcon *tcon;
1700 struct cifsInodeInfo *cinode;
1701 struct cifsFileInfo *cfile;
1708 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1709 cmd, flock->fl_flags, flock->fl_type,
1710 flock->fl_start, flock->fl_end);
1712 cfile = (struct cifsFileInfo *)file->private_data;
1713 tcon = tlink_tcon(cfile->tlink);
1715 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1718 cifs_sb = CIFS_FILE_SB(file);
1719 netfid = cfile->fid.netfid;
1720 cinode = CIFS_I(file_inode(file));
1722 if (cap_unix(tcon->ses) &&
1723 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1724 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1727 * BB add code here to normalize offset and length to account for
1728 * negative length which we can not accept over the wire.
1730 if (IS_GETLK(cmd)) {
1731 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1736 if (!lock && !unlock) {
1738 * if no lock or unlock then nothing to do since we do not
1745 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1752 * update the file size (if needed) after a write. Should be called with
1753 * the inode->i_lock held
1756 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1757 unsigned int bytes_written)
1759 loff_t end_of_write = offset + bytes_written;
1761 if (end_of_write > cifsi->server_eof)
1762 cifsi->server_eof = end_of_write;
1766 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1767 size_t write_size, loff_t *offset)
1770 unsigned int bytes_written = 0;
1771 unsigned int total_written;
1772 struct cifs_sb_info *cifs_sb;
1773 struct cifs_tcon *tcon;
1774 struct TCP_Server_Info *server;
1776 struct dentry *dentry = open_file->dentry;
1777 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1778 struct cifs_io_parms io_parms;
1780 cifs_sb = CIFS_SB(dentry->d_sb);
1782 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1783 write_size, *offset, dentry);
1785 tcon = tlink_tcon(open_file->tlink);
1786 server = tcon->ses->server;
1788 if (!server->ops->sync_write)
1793 for (total_written = 0; write_size > total_written;
1794 total_written += bytes_written) {
1796 while (rc == -EAGAIN) {
1800 if (open_file->invalidHandle) {
1801 /* we could deadlock if we called
1802 filemap_fdatawait from here so tell
1803 reopen_file not to flush data to
1805 rc = cifs_reopen_file(open_file, false);
1810 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1811 (unsigned int)write_size - total_written);
1812 /* iov[0] is reserved for smb header */
1813 iov[1].iov_base = (char *)write_data + total_written;
1814 iov[1].iov_len = len;
1816 io_parms.tcon = tcon;
1817 io_parms.offset = *offset;
1818 io_parms.length = len;
1819 rc = server->ops->sync_write(xid, &open_file->fid,
1820 &io_parms, &bytes_written, iov, 1);
1822 if (rc || (bytes_written == 0)) {
1830 spin_lock(&d_inode(dentry)->i_lock);
1831 cifs_update_eof(cifsi, *offset, bytes_written);
1832 spin_unlock(&d_inode(dentry)->i_lock);
1833 *offset += bytes_written;
1837 cifs_stats_bytes_written(tcon, total_written);
1839 if (total_written > 0) {
1840 spin_lock(&d_inode(dentry)->i_lock);
1841 if (*offset > d_inode(dentry)->i_size)
1842 i_size_write(d_inode(dentry), *offset);
1843 spin_unlock(&d_inode(dentry)->i_lock);
1845 mark_inode_dirty_sync(d_inode(dentry));
1847 return total_written;
1850 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1853 struct cifsFileInfo *open_file = NULL;
1854 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1855 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1857 /* only filter by fsuid on multiuser mounts */
1858 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1861 spin_lock(&tcon->open_file_lock);
1862 /* we could simply get the first_list_entry since write-only entries
1863 are always at the end of the list but since the first entry might
1864 have a close pending, we go through the whole list */
1865 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1866 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1868 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1869 if (!open_file->invalidHandle) {
1870 /* found a good file */
1871 /* lock it so it will not be closed on us */
1872 cifsFileInfo_get(open_file);
1873 spin_unlock(&tcon->open_file_lock);
1875 } /* else might as well continue, and look for
1876 another, or simply have the caller reopen it
1877 again rather than trying to fix this handle */
1878 } else /* write only file */
1879 break; /* write only files are last so must be done */
1881 spin_unlock(&tcon->open_file_lock);
1885 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1888 struct cifsFileInfo *open_file, *inv_file = NULL;
1889 struct cifs_sb_info *cifs_sb;
1890 struct cifs_tcon *tcon;
1891 bool any_available = false;
1893 unsigned int refind = 0;
1895 /* Having a null inode here (because mapping->host was set to zero by
1896 the VFS or MM) should not happen but we had reports of on oops (due to
1897 it being zero) during stress testcases so we need to check for it */
1899 if (cifs_inode == NULL) {
1900 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1905 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1906 tcon = cifs_sb_master_tcon(cifs_sb);
1908 /* only filter by fsuid on multiuser mounts */
1909 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1912 spin_lock(&tcon->open_file_lock);
1914 if (refind > MAX_REOPEN_ATT) {
1915 spin_unlock(&tcon->open_file_lock);
1918 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1919 if (!any_available && open_file->pid != current->tgid)
1921 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1923 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1924 if (!open_file->invalidHandle) {
1925 /* found a good writable file */
1926 cifsFileInfo_get(open_file);
1927 spin_unlock(&tcon->open_file_lock);
1931 inv_file = open_file;
1935 /* couldn't find useable FH with same pid, try any available */
1936 if (!any_available) {
1937 any_available = true;
1938 goto refind_writable;
1942 any_available = false;
1943 cifsFileInfo_get(inv_file);
1946 spin_unlock(&tcon->open_file_lock);
1949 rc = cifs_reopen_file(inv_file, false);
1953 spin_lock(&tcon->open_file_lock);
1954 list_move_tail(&inv_file->flist,
1955 &cifs_inode->openFileList);
1956 spin_unlock(&tcon->open_file_lock);
1957 cifsFileInfo_put(inv_file);
1960 spin_lock(&tcon->open_file_lock);
1961 goto refind_writable;
1968 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1970 struct address_space *mapping = page->mapping;
1971 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1974 int bytes_written = 0;
1975 struct inode *inode;
1976 struct cifsFileInfo *open_file;
1978 if (!mapping || !mapping->host)
1981 inode = page->mapping->host;
1983 offset += (loff_t)from;
1984 write_data = kmap(page);
1987 if ((to > PAGE_SIZE) || (from > to)) {
1992 /* racing with truncate? */
1993 if (offset > mapping->host->i_size) {
1995 return 0; /* don't care */
1998 /* check to make sure that we are not extending the file */
1999 if (mapping->host->i_size - offset < (loff_t)to)
2000 to = (unsigned)(mapping->host->i_size - offset);
2002 open_file = find_writable_file(CIFS_I(mapping->host), false);
2004 bytes_written = cifs_write(open_file, open_file->pid,
2005 write_data, to - from, &offset);
2006 cifsFileInfo_put(open_file);
2007 /* Does mm or vfs already set times? */
2008 inode->i_atime = inode->i_mtime = current_time(inode);
2009 if ((bytes_written > 0) && (offset))
2011 else if (bytes_written < 0)
2014 cifs_dbg(FYI, "No writeable filehandles for inode\n");
2022 static struct cifs_writedata *
2023 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2024 pgoff_t end, pgoff_t *index,
2025 unsigned int *found_pages)
2027 unsigned int nr_pages;
2028 struct page **pages;
2029 struct cifs_writedata *wdata;
2031 wdata = cifs_writedata_alloc((unsigned int)tofind,
2032 cifs_writev_complete);
2037 * find_get_pages_tag seems to return a max of 256 on each
2038 * iteration, so we must call it several times in order to
2039 * fill the array or the wsize is effectively limited to
2043 pages = wdata->pages;
2045 nr_pages = find_get_pages_tag(mapping, index,
2046 PAGECACHE_TAG_DIRTY, tofind,
2048 *found_pages += nr_pages;
2051 } while (nr_pages && tofind && *index <= end);
2057 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2058 struct address_space *mapping,
2059 struct writeback_control *wbc,
2060 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2062 unsigned int nr_pages = 0, i;
2065 for (i = 0; i < found_pages; i++) {
2066 page = wdata->pages[i];
2068 * At this point we hold neither mapping->tree_lock nor
2069 * lock on the page itself: the page may be truncated or
2070 * invalidated (changing page->mapping to NULL), or even
2071 * swizzled back from swapper_space to tmpfs file
2077 else if (!trylock_page(page))
2080 if (unlikely(page->mapping != mapping)) {
2085 if (!wbc->range_cyclic && page->index > end) {
2091 if (*next && (page->index != *next)) {
2092 /* Not next consecutive page */
2097 if (wbc->sync_mode != WB_SYNC_NONE)
2098 wait_on_page_writeback(page);
2100 if (PageWriteback(page) ||
2101 !clear_page_dirty_for_io(page)) {
2107 * This actually clears the dirty bit in the radix tree.
2108 * See cifs_writepage() for more commentary.
2110 set_page_writeback(page);
2111 if (page_offset(page) >= i_size_read(mapping->host)) {
2114 end_page_writeback(page);
2118 wdata->pages[i] = page;
2119 *next = page->index + 1;
2123 /* reset index to refind any pages skipped */
2125 *index = wdata->pages[0]->index + 1;
2127 /* put any pages we aren't going to use */
2128 for (i = nr_pages; i < found_pages; i++) {
2129 put_page(wdata->pages[i]);
2130 wdata->pages[i] = NULL;
2137 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2138 struct address_space *mapping, struct writeback_control *wbc)
2141 struct TCP_Server_Info *server;
2144 wdata->sync_mode = wbc->sync_mode;
2145 wdata->nr_pages = nr_pages;
2146 wdata->offset = page_offset(wdata->pages[0]);
2147 wdata->pagesz = PAGE_SIZE;
2148 wdata->tailsz = min(i_size_read(mapping->host) -
2149 page_offset(wdata->pages[nr_pages - 1]),
2151 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2153 if (wdata->cfile != NULL)
2154 cifsFileInfo_put(wdata->cfile);
2155 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2156 if (!wdata->cfile) {
2157 cifs_dbg(VFS, "No writable handles for inode\n");
2160 wdata->pid = wdata->cfile->pid;
2161 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2162 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2165 for (i = 0; i < nr_pages; ++i)
2166 unlock_page(wdata->pages[i]);
2171 static int cifs_writepages(struct address_space *mapping,
2172 struct writeback_control *wbc)
2174 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2175 struct TCP_Server_Info *server;
2176 bool done = false, scanned = false, range_whole = false;
2178 struct cifs_writedata *wdata;
2182 * If wsize is smaller than the page cache size, default to writing
2183 * one page at a time via cifs_writepage
2185 if (cifs_sb->wsize < PAGE_SIZE)
2186 return generic_writepages(mapping, wbc);
2188 if (wbc->range_cyclic) {
2189 index = mapping->writeback_index; /* Start from prev offset */
2192 index = wbc->range_start >> PAGE_SHIFT;
2193 end = wbc->range_end >> PAGE_SHIFT;
2194 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2198 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2200 while (!done && index <= end) {
2201 unsigned int i, nr_pages, found_pages, wsize, credits;
2202 pgoff_t next = 0, tofind, saved_index = index;
2204 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2209 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2211 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2215 add_credits_and_wake_if(server, credits, 0);
2219 if (found_pages == 0) {
2220 kref_put(&wdata->refcount, cifs_writedata_release);
2221 add_credits_and_wake_if(server, credits, 0);
2225 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2226 end, &index, &next, &done);
2228 /* nothing to write? */
2229 if (nr_pages == 0) {
2230 kref_put(&wdata->refcount, cifs_writedata_release);
2231 add_credits_and_wake_if(server, credits, 0);
2235 wdata->credits = credits;
2237 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2239 /* send failure -- clean up the mess */
2241 add_credits_and_wake_if(server, wdata->credits, 0);
2242 for (i = 0; i < nr_pages; ++i) {
2244 redirty_page_for_writepage(wbc,
2247 SetPageError(wdata->pages[i]);
2248 end_page_writeback(wdata->pages[i]);
2249 put_page(wdata->pages[i]);
2252 mapping_set_error(mapping, rc);
2254 kref_put(&wdata->refcount, cifs_writedata_release);
2256 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2257 index = saved_index;
2261 wbc->nr_to_write -= nr_pages;
2262 if (wbc->nr_to_write <= 0)
2268 if (!scanned && !done) {
2270 * We hit the last page and there is more work to be done: wrap
2271 * back to the start of the file
2278 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2279 mapping->writeback_index = index;
2285 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2291 /* BB add check for wbc flags */
2293 if (!PageUptodate(page))
2294 cifs_dbg(FYI, "ppw - page not up to date\n");
2297 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2299 * A writepage() implementation always needs to do either this,
2300 * or re-dirty the page with "redirty_page_for_writepage()" in
2301 * the case of a failure.
2303 * Just unlocking the page will cause the radix tree tag-bits
2304 * to fail to update with the state of the page correctly.
2306 set_page_writeback(page);
2308 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2309 if (rc == -EAGAIN) {
2310 if (wbc->sync_mode == WB_SYNC_ALL)
2312 redirty_page_for_writepage(wbc, page);
2313 } else if (rc != 0) {
2315 mapping_set_error(page->mapping, rc);
2317 SetPageUptodate(page);
2319 end_page_writeback(page);
2325 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2327 int rc = cifs_writepage_locked(page, wbc);
2332 static int cifs_write_end(struct file *file, struct address_space *mapping,
2333 loff_t pos, unsigned len, unsigned copied,
2334 struct page *page, void *fsdata)
2337 struct inode *inode = mapping->host;
2338 struct cifsFileInfo *cfile = file->private_data;
2339 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2342 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2345 pid = current->tgid;
2347 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2350 if (PageChecked(page)) {
2352 SetPageUptodate(page);
2353 ClearPageChecked(page);
2354 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2355 SetPageUptodate(page);
2357 if (!PageUptodate(page)) {
2359 unsigned offset = pos & (PAGE_SIZE - 1);
2363 /* this is probably better than directly calling
2364 partialpage_write since in this function the file handle is
2365 known which we might as well leverage */
2366 /* BB check if anything else missing out of ppw
2367 such as updating last write time */
2368 page_data = kmap(page);
2369 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2370 /* if (rc < 0) should we set writebehind rc? */
2377 set_page_dirty(page);
2381 spin_lock(&inode->i_lock);
2382 if (pos > inode->i_size)
2383 i_size_write(inode, pos);
2384 spin_unlock(&inode->i_lock);
2393 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2398 struct cifs_tcon *tcon;
2399 struct TCP_Server_Info *server;
2400 struct cifsFileInfo *smbfile = file->private_data;
2401 struct inode *inode = file_inode(file);
2402 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2404 rc = file_write_and_wait_range(file, start, end);
2411 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2414 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2415 rc = cifs_zap_mapping(inode);
2417 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2418 rc = 0; /* don't care about it in fsync */
2422 tcon = tlink_tcon(smbfile->tlink);
2423 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2424 server = tcon->ses->server;
2425 if (server->ops->flush)
2426 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2432 inode_unlock(inode);
2436 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2440 struct cifs_tcon *tcon;
2441 struct TCP_Server_Info *server;
2442 struct cifsFileInfo *smbfile = file->private_data;
2443 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2444 struct inode *inode = file->f_mapping->host;
2446 rc = file_write_and_wait_range(file, start, end);
2453 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2456 tcon = tlink_tcon(smbfile->tlink);
2457 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2458 server = tcon->ses->server;
2459 if (server->ops->flush)
2460 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2466 inode_unlock(inode);
2471 * As file closes, flush all cached write data for this inode checking
2472 * for write behind errors.
2474 int cifs_flush(struct file *file, fl_owner_t id)
2476 struct inode *inode = file_inode(file);
2479 if (file->f_mode & FMODE_WRITE)
2480 rc = filemap_write_and_wait(inode->i_mapping);
2482 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2488 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2493 for (i = 0; i < num_pages; i++) {
2494 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2497 * save number of pages we have already allocated and
2498 * return with ENOMEM error
2507 for (i = 0; i < num_pages; i++)
2514 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2519 clen = min_t(const size_t, len, wsize);
2520 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2529 cifs_uncached_writedata_release(struct kref *refcount)
2532 struct cifs_writedata *wdata = container_of(refcount,
2533 struct cifs_writedata, refcount);
2535 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2536 for (i = 0; i < wdata->nr_pages; i++)
2537 put_page(wdata->pages[i]);
2538 cifs_writedata_release(refcount);
2541 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2544 cifs_uncached_writev_complete(struct work_struct *work)
2546 struct cifs_writedata *wdata = container_of(work,
2547 struct cifs_writedata, work);
2548 struct inode *inode = d_inode(wdata->cfile->dentry);
2549 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2551 spin_lock(&inode->i_lock);
2552 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2553 if (cifsi->server_eof > inode->i_size)
2554 i_size_write(inode, cifsi->server_eof);
2555 spin_unlock(&inode->i_lock);
2557 complete(&wdata->done);
2558 collect_uncached_write_data(wdata->ctx);
2559 /* the below call can possibly free the last ref to aio ctx */
2560 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2564 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2565 size_t *len, unsigned long *num_pages)
2567 size_t save_len, copied, bytes, cur_len = *len;
2568 unsigned long i, nr_pages = *num_pages;
2571 for (i = 0; i < nr_pages; i++) {
2572 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2573 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2576 * If we didn't copy as much as we expected, then that
2577 * may mean we trod into an unmapped area. Stop copying
2578 * at that point. On the next pass through the big
2579 * loop, we'll likely end up getting a zero-length
2580 * write and bailing out of it.
2585 cur_len = save_len - cur_len;
2589 * If we have no data to send, then that probably means that
2590 * the copy above failed altogether. That's most likely because
2591 * the address in the iovec was bogus. Return -EFAULT and let
2592 * the caller free anything we allocated and bail out.
2598 * i + 1 now represents the number of pages we actually used in
2599 * the copy phase above.
2606 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2607 struct cifsFileInfo *open_file,
2608 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2609 struct cifs_aio_ctx *ctx)
2613 unsigned long nr_pages, num_pages, i;
2614 struct cifs_writedata *wdata;
2615 struct iov_iter saved_from = *from;
2616 loff_t saved_offset = offset;
2618 struct TCP_Server_Info *server;
2620 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2621 pid = open_file->pid;
2623 pid = current->tgid;
2625 server = tlink_tcon(open_file->tlink)->ses->server;
2628 unsigned int wsize, credits;
2630 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2635 nr_pages = get_numpages(wsize, len, &cur_len);
2636 wdata = cifs_writedata_alloc(nr_pages,
2637 cifs_uncached_writev_complete);
2640 add_credits_and_wake_if(server, credits, 0);
2644 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2647 add_credits_and_wake_if(server, credits, 0);
2651 num_pages = nr_pages;
2652 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2654 for (i = 0; i < nr_pages; i++)
2655 put_page(wdata->pages[i]);
2657 add_credits_and_wake_if(server, credits, 0);
2662 * Bring nr_pages down to the number of pages we actually used,
2663 * and free any pages that we didn't use.
2665 for ( ; nr_pages > num_pages; nr_pages--)
2666 put_page(wdata->pages[nr_pages - 1]);
2668 wdata->sync_mode = WB_SYNC_ALL;
2669 wdata->nr_pages = nr_pages;
2670 wdata->offset = (__u64)offset;
2671 wdata->cfile = cifsFileInfo_get(open_file);
2673 wdata->bytes = cur_len;
2674 wdata->pagesz = PAGE_SIZE;
2675 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2676 wdata->credits = credits;
2678 kref_get(&ctx->refcount);
2680 if (!wdata->cfile->invalidHandle ||
2681 !(rc = cifs_reopen_file(wdata->cfile, false)))
2682 rc = server->ops->async_writev(wdata,
2683 cifs_uncached_writedata_release);
2685 add_credits_and_wake_if(server, wdata->credits, 0);
2686 kref_put(&wdata->refcount,
2687 cifs_uncached_writedata_release);
2688 if (rc == -EAGAIN) {
2690 iov_iter_advance(from, offset - saved_offset);
2696 list_add_tail(&wdata->list, wdata_list);
2704 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2706 struct cifs_writedata *wdata, *tmp;
2707 struct cifs_tcon *tcon;
2708 struct cifs_sb_info *cifs_sb;
2709 struct dentry *dentry = ctx->cfile->dentry;
2713 tcon = tlink_tcon(ctx->cfile->tlink);
2714 cifs_sb = CIFS_SB(dentry->d_sb);
2716 mutex_lock(&ctx->aio_mutex);
2718 if (list_empty(&ctx->list)) {
2719 mutex_unlock(&ctx->aio_mutex);
2725 * Wait for and collect replies for any successful sends in order of
2726 * increasing offset. Once an error is hit, then return without waiting
2727 * for any more replies.
2730 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2732 if (!try_wait_for_completion(&wdata->done)) {
2733 mutex_unlock(&ctx->aio_mutex);
2740 ctx->total_len += wdata->bytes;
2742 /* resend call if it's a retryable error */
2743 if (rc == -EAGAIN) {
2744 struct list_head tmp_list;
2745 struct iov_iter tmp_from = ctx->iter;
2747 INIT_LIST_HEAD(&tmp_list);
2748 list_del_init(&wdata->list);
2750 iov_iter_advance(&tmp_from,
2751 wdata->offset - ctx->pos);
2753 rc = cifs_write_from_iter(wdata->offset,
2754 wdata->bytes, &tmp_from,
2755 ctx->cfile, cifs_sb, &tmp_list,
2758 list_splice(&tmp_list, &ctx->list);
2760 kref_put(&wdata->refcount,
2761 cifs_uncached_writedata_release);
2765 list_del_init(&wdata->list);
2766 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2769 for (i = 0; i < ctx->npages; i++)
2770 put_page(ctx->bv[i].bv_page);
2772 cifs_stats_bytes_written(tcon, ctx->total_len);
2773 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2775 ctx->rc = (rc == 0) ? ctx->total_len : rc;
2777 mutex_unlock(&ctx->aio_mutex);
2779 if (ctx->iocb && ctx->iocb->ki_complete)
2780 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2782 complete(&ctx->done);
2785 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2787 struct file *file = iocb->ki_filp;
2788 ssize_t total_written = 0;
2789 struct cifsFileInfo *cfile;
2790 struct cifs_tcon *tcon;
2791 struct cifs_sb_info *cifs_sb;
2792 struct cifs_aio_ctx *ctx;
2793 struct iov_iter saved_from = *from;
2797 * BB - optimize the way when signing is disabled. We can drop this
2798 * extra memory-to-memory copying and use iovec buffers for constructing
2802 rc = generic_write_checks(iocb, from);
2806 cifs_sb = CIFS_FILE_SB(file);
2807 cfile = file->private_data;
2808 tcon = tlink_tcon(cfile->tlink);
2810 if (!tcon->ses->server->ops->async_writev)
2813 ctx = cifs_aio_ctx_alloc();
2817 ctx->cfile = cifsFileInfo_get(cfile);
2819 if (!is_sync_kiocb(iocb))
2822 ctx->pos = iocb->ki_pos;
2824 rc = setup_aio_ctx_iter(ctx, from, WRITE);
2826 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2830 /* grab a lock here due to read response handlers can access ctx */
2831 mutex_lock(&ctx->aio_mutex);
2833 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2834 cfile, cifs_sb, &ctx->list, ctx);
2837 * If at least one write was successfully sent, then discard any rc
2838 * value from the later writes. If the other write succeeds, then
2839 * we'll end up returning whatever was written. If it fails, then
2840 * we'll get a new rc value from that.
2842 if (!list_empty(&ctx->list))
2845 mutex_unlock(&ctx->aio_mutex);
2848 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2852 if (!is_sync_kiocb(iocb)) {
2853 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2854 return -EIOCBQUEUED;
2857 rc = wait_for_completion_killable(&ctx->done);
2859 mutex_lock(&ctx->aio_mutex);
2860 ctx->rc = rc = -EINTR;
2861 total_written = ctx->total_len;
2862 mutex_unlock(&ctx->aio_mutex);
2865 total_written = ctx->total_len;
2868 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2870 if (unlikely(!total_written))
2873 iocb->ki_pos += total_written;
2874 return total_written;
2878 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2880 struct file *file = iocb->ki_filp;
2881 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2882 struct inode *inode = file->f_mapping->host;
2883 struct cifsInodeInfo *cinode = CIFS_I(inode);
2884 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2889 * We need to hold the sem to be sure nobody modifies lock list
2890 * with a brlock that prevents writing.
2892 down_read(&cinode->lock_sem);
2894 rc = generic_write_checks(iocb, from);
2898 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2899 server->vals->exclusive_lock_type, NULL,
2901 rc = __generic_file_write_iter(iocb, from);
2905 up_read(&cinode->lock_sem);
2906 inode_unlock(inode);
2909 rc = generic_write_sync(iocb, rc);
2914 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2916 struct inode *inode = file_inode(iocb->ki_filp);
2917 struct cifsInodeInfo *cinode = CIFS_I(inode);
2918 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2919 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2920 iocb->ki_filp->private_data;
2921 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2924 written = cifs_get_writer(cinode);
2928 if (CIFS_CACHE_WRITE(cinode)) {
2929 if (cap_unix(tcon->ses) &&
2930 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2931 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2932 written = generic_file_write_iter(iocb, from);
2935 written = cifs_writev(iocb, from);
2939 * For non-oplocked files in strict cache mode we need to write the data
2940 * to the server exactly from the pos to pos+len-1 rather than flush all
2941 * affected pages because it may cause a error with mandatory locks on
2942 * these pages but not on the region from pos to ppos+len-1.
2944 written = cifs_user_writev(iocb, from);
2945 if (CIFS_CACHE_READ(cinode)) {
2947 * We have read level caching and we have just sent a write
2948 * request to the server thus making data in the cache stale.
2949 * Zap the cache and set oplock/lease level to NONE to avoid
2950 * reading stale data from the cache. All subsequent read
2951 * operations will read new data from the server.
2953 cifs_zap_mapping(inode);
2954 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
2959 cifs_put_writer(cinode);
2963 static struct cifs_readdata *
2964 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2966 struct cifs_readdata *rdata;
2968 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2970 if (rdata != NULL) {
2971 kref_init(&rdata->refcount);
2972 INIT_LIST_HEAD(&rdata->list);
2973 init_completion(&rdata->done);
2974 INIT_WORK(&rdata->work, complete);
2981 cifs_readdata_release(struct kref *refcount)
2983 struct cifs_readdata *rdata = container_of(refcount,
2984 struct cifs_readdata, refcount);
2987 cifsFileInfo_put(rdata->cfile);
2993 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2999 for (i = 0; i < nr_pages; i++) {
3000 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3005 rdata->pages[i] = page;
3009 unsigned int nr_page_failed = i;
3011 for (i = 0; i < nr_page_failed; i++) {
3012 put_page(rdata->pages[i]);
3013 rdata->pages[i] = NULL;
3020 cifs_uncached_readdata_release(struct kref *refcount)
3022 struct cifs_readdata *rdata = container_of(refcount,
3023 struct cifs_readdata, refcount);
3026 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3027 for (i = 0; i < rdata->nr_pages; i++) {
3028 put_page(rdata->pages[i]);
3029 rdata->pages[i] = NULL;
3031 cifs_readdata_release(refcount);
3035 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3036 * @rdata: the readdata response with list of pages holding data
3037 * @iter: destination for our data
3039 * This function copies data from a list of pages in a readdata response into
3040 * an array of iovecs. It will first calculate where the data should go
3041 * based on the info in the readdata and then copy the data into that spot.
3044 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3046 size_t remaining = rdata->got_bytes;
3049 for (i = 0; i < rdata->nr_pages; i++) {
3050 struct page *page = rdata->pages[i];
3051 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3054 if (unlikely(iter->type & ITER_PIPE)) {
3055 void *addr = kmap_atomic(page);
3057 written = copy_to_iter(addr, copy, iter);
3058 kunmap_atomic(addr);
3060 written = copy_page_to_iter(page, 0, copy, iter);
3061 remaining -= written;
3062 if (written < copy && iov_iter_count(iter) > 0)
3065 return remaining ? -EFAULT : 0;
3068 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3071 cifs_uncached_readv_complete(struct work_struct *work)
3073 struct cifs_readdata *rdata = container_of(work,
3074 struct cifs_readdata, work);
3076 complete(&rdata->done);
3077 collect_uncached_read_data(rdata->ctx);
3078 /* the below call can possibly free the last ref to aio ctx */
3079 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3083 uncached_fill_pages(struct TCP_Server_Info *server,
3084 struct cifs_readdata *rdata, struct iov_iter *iter,
3089 unsigned int nr_pages = rdata->nr_pages;
3091 rdata->got_bytes = 0;
3092 rdata->tailsz = PAGE_SIZE;
3093 for (i = 0; i < nr_pages; i++) {
3094 struct page *page = rdata->pages[i];
3098 /* no need to hold page hostage */
3099 rdata->pages[i] = NULL;
3105 if (len >= PAGE_SIZE) {
3106 /* enough data to fill the page */
3110 zero_user(page, len, PAGE_SIZE - len);
3111 rdata->tailsz = len;
3115 result = copy_page_from_iter(page, 0, n, iter);
3117 result = cifs_read_page_from_socket(server, page, n);
3121 rdata->got_bytes += result;
3124 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3125 rdata->got_bytes : result;
3129 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3130 struct cifs_readdata *rdata, unsigned int len)
3132 return uncached_fill_pages(server, rdata, NULL, len);
3136 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3137 struct cifs_readdata *rdata,
3138 struct iov_iter *iter)
3140 return uncached_fill_pages(server, rdata, iter, iter->count);
3144 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3145 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3146 struct cifs_aio_ctx *ctx)
3148 struct cifs_readdata *rdata;
3149 unsigned int npages, rsize, credits;
3153 struct TCP_Server_Info *server;
3155 server = tlink_tcon(open_file->tlink)->ses->server;
3157 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3158 pid = open_file->pid;
3160 pid = current->tgid;
3163 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3168 cur_len = min_t(const size_t, len, rsize);
3169 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3171 /* allocate a readdata struct */
3172 rdata = cifs_readdata_alloc(npages,
3173 cifs_uncached_readv_complete);
3175 add_credits_and_wake_if(server, credits, 0);
3180 rc = cifs_read_allocate_pages(rdata, npages);
3184 rdata->cfile = cifsFileInfo_get(open_file);
3185 rdata->nr_pages = npages;
3186 rdata->offset = offset;
3187 rdata->bytes = cur_len;
3189 rdata->pagesz = PAGE_SIZE;
3190 rdata->read_into_pages = cifs_uncached_read_into_pages;
3191 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3192 rdata->credits = credits;
3194 kref_get(&ctx->refcount);
3196 if (!rdata->cfile->invalidHandle ||
3197 !(rc = cifs_reopen_file(rdata->cfile, true)))
3198 rc = server->ops->async_readv(rdata);
3201 add_credits_and_wake_if(server, rdata->credits, 0);
3202 kref_put(&rdata->refcount,
3203 cifs_uncached_readdata_release);
3209 list_add_tail(&rdata->list, rdata_list);
3218 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3220 struct cifs_readdata *rdata, *tmp;
3221 struct iov_iter *to = &ctx->iter;
3222 struct cifs_sb_info *cifs_sb;
3223 struct cifs_tcon *tcon;
3227 tcon = tlink_tcon(ctx->cfile->tlink);
3228 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3230 mutex_lock(&ctx->aio_mutex);
3232 if (list_empty(&ctx->list)) {
3233 mutex_unlock(&ctx->aio_mutex);
3238 /* the loop below should proceed in the order of increasing offsets */
3240 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3242 if (!try_wait_for_completion(&rdata->done)) {
3243 mutex_unlock(&ctx->aio_mutex);
3247 if (rdata->result == -EAGAIN) {
3248 /* resend call if it's a retryable error */
3249 struct list_head tmp_list;
3250 unsigned int got_bytes = rdata->got_bytes;
3252 list_del_init(&rdata->list);
3253 INIT_LIST_HEAD(&tmp_list);
3256 * Got a part of data and then reconnect has
3257 * happened -- fill the buffer and continue
3260 if (got_bytes && got_bytes < rdata->bytes) {
3261 rc = cifs_readdata_to_iov(rdata, to);
3263 kref_put(&rdata->refcount,
3264 cifs_uncached_readdata_release);
3269 rc = cifs_send_async_read(
3270 rdata->offset + got_bytes,
3271 rdata->bytes - got_bytes,
3272 rdata->cfile, cifs_sb,
3275 list_splice(&tmp_list, &ctx->list);
3277 kref_put(&rdata->refcount,
3278 cifs_uncached_readdata_release);
3280 } else if (rdata->result)
3283 rc = cifs_readdata_to_iov(rdata, to);
3285 /* if there was a short read -- discard anything left */
3286 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3289 list_del_init(&rdata->list);
3290 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3293 for (i = 0; i < ctx->npages; i++) {
3294 if (ctx->should_dirty)
3295 set_page_dirty(ctx->bv[i].bv_page);
3296 put_page(ctx->bv[i].bv_page);
3299 ctx->total_len = ctx->len - iov_iter_count(to);
3301 cifs_stats_bytes_read(tcon, ctx->total_len);
3303 /* mask nodata case */
3307 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3309 mutex_unlock(&ctx->aio_mutex);
3311 if (ctx->iocb && ctx->iocb->ki_complete)
3312 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3314 complete(&ctx->done);
3317 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3319 struct file *file = iocb->ki_filp;
3322 ssize_t total_read = 0;
3323 loff_t offset = iocb->ki_pos;
3324 struct cifs_sb_info *cifs_sb;
3325 struct cifs_tcon *tcon;
3326 struct cifsFileInfo *cfile;
3327 struct cifs_aio_ctx *ctx;
3329 len = iov_iter_count(to);
3333 cifs_sb = CIFS_FILE_SB(file);
3334 cfile = file->private_data;
3335 tcon = tlink_tcon(cfile->tlink);
3337 if (!tcon->ses->server->ops->async_readv)
3340 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3341 cifs_dbg(FYI, "attempting read on write only file instance\n");
3343 ctx = cifs_aio_ctx_alloc();
3347 ctx->cfile = cifsFileInfo_get(cfile);
3349 if (!is_sync_kiocb(iocb))
3352 if (to->type == ITER_IOVEC)
3353 ctx->should_dirty = true;
3355 rc = setup_aio_ctx_iter(ctx, to, READ);
3357 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3363 /* grab a lock here due to read response handlers can access ctx */
3364 mutex_lock(&ctx->aio_mutex);
3366 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3368 /* if at least one read request send succeeded, then reset rc */
3369 if (!list_empty(&ctx->list))
3372 mutex_unlock(&ctx->aio_mutex);
3375 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3379 if (!is_sync_kiocb(iocb)) {
3380 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3381 return -EIOCBQUEUED;
3384 rc = wait_for_completion_killable(&ctx->done);
3386 mutex_lock(&ctx->aio_mutex);
3387 ctx->rc = rc = -EINTR;
3388 total_read = ctx->total_len;
3389 mutex_unlock(&ctx->aio_mutex);
3392 total_read = ctx->total_len;
3395 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3398 iocb->ki_pos += total_read;
3405 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3407 struct inode *inode = file_inode(iocb->ki_filp);
3408 struct cifsInodeInfo *cinode = CIFS_I(inode);
3409 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3410 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3411 iocb->ki_filp->private_data;
3412 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3416 * In strict cache mode we need to read from the server all the time
3417 * if we don't have level II oplock because the server can delay mtime
3418 * change - so we can't make a decision about inode invalidating.
3419 * And we can also fail with pagereading if there are mandatory locks
3420 * on pages affected by this read but not on the region from pos to
3423 if (!CIFS_CACHE_READ(cinode))
3424 return cifs_user_readv(iocb, to);
3426 if (cap_unix(tcon->ses) &&
3427 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3428 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3429 return generic_file_read_iter(iocb, to);
3432 * We need to hold the sem to be sure nobody modifies lock list
3433 * with a brlock that prevents reading.
3435 down_read(&cinode->lock_sem);
3436 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3437 tcon->ses->server->vals->shared_lock_type,
3438 NULL, CIFS_READ_OP))
3439 rc = generic_file_read_iter(iocb, to);
3440 up_read(&cinode->lock_sem);
3445 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3448 unsigned int bytes_read = 0;
3449 unsigned int total_read;
3450 unsigned int current_read_size;
3452 struct cifs_sb_info *cifs_sb;
3453 struct cifs_tcon *tcon;
3454 struct TCP_Server_Info *server;
3457 struct cifsFileInfo *open_file;
3458 struct cifs_io_parms io_parms;
3459 int buf_type = CIFS_NO_BUFFER;
3463 cifs_sb = CIFS_FILE_SB(file);
3465 /* FIXME: set up handlers for larger reads and/or convert to async */
3466 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3468 if (file->private_data == NULL) {
3473 open_file = file->private_data;
3474 tcon = tlink_tcon(open_file->tlink);
3475 server = tcon->ses->server;
3477 if (!server->ops->sync_read) {
3482 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3483 pid = open_file->pid;
3485 pid = current->tgid;
3487 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3488 cifs_dbg(FYI, "attempting read on write only file instance\n");
3490 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3491 total_read += bytes_read, cur_offset += bytes_read) {
3493 current_read_size = min_t(uint, read_size - total_read,
3496 * For windows me and 9x we do not want to request more
3497 * than it negotiated since it will refuse the read
3500 if (!(tcon->ses->capabilities &
3501 tcon->ses->server->vals->cap_large_files)) {
3502 current_read_size = min_t(uint,
3503 current_read_size, CIFSMaxBufSize);
3505 if (open_file->invalidHandle) {
3506 rc = cifs_reopen_file(open_file, true);
3511 io_parms.tcon = tcon;
3512 io_parms.offset = *offset;
3513 io_parms.length = current_read_size;
3514 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3515 &bytes_read, &cur_offset,
3517 } while (rc == -EAGAIN);
3519 if (rc || (bytes_read == 0)) {
3527 cifs_stats_bytes_read(tcon, total_read);
3528 *offset += bytes_read;
3536 * If the page is mmap'ed into a process' page tables, then we need to make
3537 * sure that it doesn't change while being written back.
3540 cifs_page_mkwrite(struct vm_fault *vmf)
3542 struct page *page = vmf->page;
3545 return VM_FAULT_LOCKED;
3548 static const struct vm_operations_struct cifs_file_vm_ops = {
3549 .fault = filemap_fault,
3550 .map_pages = filemap_map_pages,
3551 .page_mkwrite = cifs_page_mkwrite,
3554 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3557 struct inode *inode = file_inode(file);
3561 if (!CIFS_CACHE_READ(CIFS_I(inode)))
3562 rc = cifs_zap_mapping(inode);
3564 rc = generic_file_mmap(file, vma);
3566 vma->vm_ops = &cifs_file_vm_ops;
3572 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3578 rc = cifs_revalidate_file(file);
3580 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3583 rc = generic_file_mmap(file, vma);
3585 vma->vm_ops = &cifs_file_vm_ops;
3592 cifs_readv_complete(struct work_struct *work)
3594 unsigned int i, got_bytes;
3595 struct cifs_readdata *rdata = container_of(work,
3596 struct cifs_readdata, work);
3598 got_bytes = rdata->got_bytes;
3599 for (i = 0; i < rdata->nr_pages; i++) {
3600 struct page *page = rdata->pages[i];
3602 lru_cache_add_file(page);
3604 if (rdata->result == 0 ||
3605 (rdata->result == -EAGAIN && got_bytes)) {
3606 flush_dcache_page(page);
3607 SetPageUptodate(page);
3612 if (rdata->result == 0 ||
3613 (rdata->result == -EAGAIN && got_bytes))
3614 cifs_readpage_to_fscache(rdata->mapping->host, page);
3616 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3619 rdata->pages[i] = NULL;
3621 kref_put(&rdata->refcount, cifs_readdata_release);
3625 readpages_fill_pages(struct TCP_Server_Info *server,
3626 struct cifs_readdata *rdata, struct iov_iter *iter,
3633 unsigned int nr_pages = rdata->nr_pages;
3635 /* determine the eof that the server (probably) has */
3636 eof = CIFS_I(rdata->mapping->host)->server_eof;
3637 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3638 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3640 rdata->got_bytes = 0;
3641 rdata->tailsz = PAGE_SIZE;
3642 for (i = 0; i < nr_pages; i++) {
3643 struct page *page = rdata->pages[i];
3644 size_t n = PAGE_SIZE;
3646 if (len >= PAGE_SIZE) {
3648 } else if (len > 0) {
3649 /* enough for partial page, fill and zero the rest */
3650 zero_user(page, len, PAGE_SIZE - len);
3651 n = rdata->tailsz = len;
3653 } else if (page->index > eof_index) {
3655 * The VFS will not try to do readahead past the
3656 * i_size, but it's possible that we have outstanding
3657 * writes with gaps in the middle and the i_size hasn't
3658 * caught up yet. Populate those with zeroed out pages
3659 * to prevent the VFS from repeatedly attempting to
3660 * fill them until the writes are flushed.
3662 zero_user(page, 0, PAGE_SIZE);
3663 lru_cache_add_file(page);
3664 flush_dcache_page(page);
3665 SetPageUptodate(page);
3668 rdata->pages[i] = NULL;
3672 /* no need to hold page hostage */
3673 lru_cache_add_file(page);
3676 rdata->pages[i] = NULL;
3682 result = copy_page_from_iter(page, 0, n, iter);
3684 result = cifs_read_page_from_socket(server, page, n);
3688 rdata->got_bytes += result;
3691 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3692 rdata->got_bytes : result;
3696 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3697 struct cifs_readdata *rdata, unsigned int len)
3699 return readpages_fill_pages(server, rdata, NULL, len);
3703 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3704 struct cifs_readdata *rdata,
3705 struct iov_iter *iter)
3707 return readpages_fill_pages(server, rdata, iter, iter->count);
3711 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3712 unsigned int rsize, struct list_head *tmplist,
3713 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3715 struct page *page, *tpage;
3716 unsigned int expected_index;
3718 gfp_t gfp = readahead_gfp_mask(mapping);
3720 INIT_LIST_HEAD(tmplist);
3722 page = list_entry(page_list->prev, struct page, lru);
3725 * Lock the page and put it in the cache. Since no one else
3726 * should have access to this page, we're safe to simply set
3727 * PG_locked without checking it first.
3729 __SetPageLocked(page);
3730 rc = add_to_page_cache_locked(page, mapping,
3733 /* give up if we can't stick it in the cache */
3735 __ClearPageLocked(page);
3739 /* move first page to the tmplist */
3740 *offset = (loff_t)page->index << PAGE_SHIFT;
3743 list_move_tail(&page->lru, tmplist);
3745 /* now try and add more pages onto the request */
3746 expected_index = page->index + 1;
3747 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3748 /* discontinuity ? */
3749 if (page->index != expected_index)
3752 /* would this page push the read over the rsize? */
3753 if (*bytes + PAGE_SIZE > rsize)
3756 __SetPageLocked(page);
3757 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
3759 __ClearPageLocked(page);
3762 list_move_tail(&page->lru, tmplist);
3763 (*bytes) += PAGE_SIZE;
3770 static int cifs_readpages(struct file *file, struct address_space *mapping,
3771 struct list_head *page_list, unsigned num_pages)
3775 struct list_head tmplist;
3776 struct cifsFileInfo *open_file = file->private_data;
3777 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3778 struct TCP_Server_Info *server;
3782 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3783 * immediately if the cookie is negative
3785 * After this point, every page in the list might have PG_fscache set,
3786 * so we will need to clean that up off of every page we don't use.
3788 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3793 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3794 pid = open_file->pid;
3796 pid = current->tgid;
3799 server = tlink_tcon(open_file->tlink)->ses->server;
3801 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3802 __func__, file, mapping, num_pages);
3805 * Start with the page at end of list and move it to private
3806 * list. Do the same with any following pages until we hit
3807 * the rsize limit, hit an index discontinuity, or run out of
3808 * pages. Issue the async read and then start the loop again
3809 * until the list is empty.
3811 * Note that list order is important. The page_list is in
3812 * the order of declining indexes. When we put the pages in
3813 * the rdata->pages, then we want them in increasing order.
3815 while (!list_empty(page_list) && !err) {
3816 unsigned int i, nr_pages, bytes, rsize;
3818 struct page *page, *tpage;
3819 struct cifs_readdata *rdata;
3822 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3828 * Give up immediately if rsize is too small to read an entire
3829 * page. The VFS will fall back to readpage. We should never
3830 * reach this point however since we set ra_pages to 0 when the
3831 * rsize is smaller than a cache page.
3833 if (unlikely(rsize < PAGE_SIZE)) {
3834 add_credits_and_wake_if(server, credits, 0);
3839 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3840 &nr_pages, &offset, &bytes);
3842 add_credits_and_wake_if(server, credits, 0);
3846 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3848 /* best to give up if we're out of mem */
3849 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3850 list_del(&page->lru);
3851 lru_cache_add_file(page);
3856 add_credits_and_wake_if(server, credits, 0);
3860 rdata->cfile = cifsFileInfo_get(open_file);
3861 rdata->mapping = mapping;
3862 rdata->offset = offset;
3863 rdata->bytes = bytes;
3865 rdata->pagesz = PAGE_SIZE;
3866 rdata->read_into_pages = cifs_readpages_read_into_pages;
3867 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
3868 rdata->credits = credits;
3870 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3871 list_del(&page->lru);
3872 rdata->pages[rdata->nr_pages++] = page;
3875 if (!rdata->cfile->invalidHandle ||
3876 !(rc = cifs_reopen_file(rdata->cfile, true)))
3877 rc = server->ops->async_readv(rdata);
3879 add_credits_and_wake_if(server, rdata->credits, 0);
3880 for (i = 0; i < rdata->nr_pages; i++) {
3881 page = rdata->pages[i];
3882 lru_cache_add_file(page);
3886 /* Fallback to the readpage in error/reconnect cases */
3887 kref_put(&rdata->refcount, cifs_readdata_release);
3891 kref_put(&rdata->refcount, cifs_readdata_release);
3894 /* Any pages that have been shown to fscache but didn't get added to
3895 * the pagecache must be uncached before they get returned to the
3898 cifs_fscache_readpages_cancel(mapping->host, page_list);
3903 * cifs_readpage_worker must be called with the page pinned
3905 static int cifs_readpage_worker(struct file *file, struct page *page,
3911 /* Is the page cached? */
3912 rc = cifs_readpage_from_fscache(file_inode(file), page);
3916 read_data = kmap(page);
3917 /* for reads over a certain size could initiate async read ahead */
3919 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3924 cifs_dbg(FYI, "Bytes read %d\n", rc);
3926 file_inode(file)->i_atime =
3927 current_time(file_inode(file));
3930 memset(read_data + rc, 0, PAGE_SIZE - rc);
3932 flush_dcache_page(page);
3933 SetPageUptodate(page);
3935 /* send this page to the cache */
3936 cifs_readpage_to_fscache(file_inode(file), page);
3948 static int cifs_readpage(struct file *file, struct page *page)
3950 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3956 if (file->private_data == NULL) {
3962 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3963 page, (int)offset, (int)offset);
3965 rc = cifs_readpage_worker(file, page, &offset);
3971 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3973 struct cifsFileInfo *open_file;
3974 struct cifs_tcon *tcon =
3975 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3977 spin_lock(&tcon->open_file_lock);
3978 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3979 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3980 spin_unlock(&tcon->open_file_lock);
3984 spin_unlock(&tcon->open_file_lock);
3988 /* We do not want to update the file size from server for inodes
3989 open for write - to avoid races with writepage extending
3990 the file - in the future we could consider allowing
3991 refreshing the inode only on increases in the file size
3992 but this is tricky to do without racing with writebehind
3993 page caching in the current Linux kernel design */
3994 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3999 if (is_inode_writable(cifsInode)) {
4000 /* This inode is open for write at least once */
4001 struct cifs_sb_info *cifs_sb;
4003 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4004 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4005 /* since no page cache to corrupt on directio
4006 we can change size safely */
4010 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4018 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4019 loff_t pos, unsigned len, unsigned flags,
4020 struct page **pagep, void **fsdata)
4023 pgoff_t index = pos >> PAGE_SHIFT;
4024 loff_t offset = pos & (PAGE_SIZE - 1);
4025 loff_t page_start = pos & PAGE_MASK;
4030 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4033 page = grab_cache_page_write_begin(mapping, index, flags);
4039 if (PageUptodate(page))
4043 * If we write a full page it will be up to date, no need to read from
4044 * the server. If the write is short, we'll end up doing a sync write
4047 if (len == PAGE_SIZE)
4051 * optimize away the read when we have an oplock, and we're not
4052 * expecting to use any of the data we'd be reading in. That
4053 * is, when the page lies beyond the EOF, or straddles the EOF
4054 * and the write will cover all of the existing data.
4056 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4057 i_size = i_size_read(mapping->host);
4058 if (page_start >= i_size ||
4059 (offset == 0 && (pos + len) >= i_size)) {
4060 zero_user_segments(page, 0, offset,
4064 * PageChecked means that the parts of the page
4065 * to which we're not writing are considered up
4066 * to date. Once the data is copied to the
4067 * page, it can be set uptodate.
4069 SetPageChecked(page);
4074 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4076 * might as well read a page, it is fast enough. If we get
4077 * an error, we don't need to return it. cifs_write_end will
4078 * do a sync write instead since PG_uptodate isn't set.
4080 cifs_readpage_worker(file, page, &page_start);
4085 /* we could try using another file handle if there is one -
4086 but how would we lock it to prevent close of that handle
4087 racing with this read? In any case
4088 this will be written out by write_end so is fine */
4095 static int cifs_release_page(struct page *page, gfp_t gfp)
4097 if (PagePrivate(page))
4100 return cifs_fscache_release_page(page, gfp);
4103 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4104 unsigned int length)
4106 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4108 if (offset == 0 && length == PAGE_SIZE)
4109 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4112 static int cifs_launder_page(struct page *page)
4115 loff_t range_start = page_offset(page);
4116 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4117 struct writeback_control wbc = {
4118 .sync_mode = WB_SYNC_ALL,
4120 .range_start = range_start,
4121 .range_end = range_end,
4124 cifs_dbg(FYI, "Launder page: %p\n", page);
4126 if (clear_page_dirty_for_io(page))
4127 rc = cifs_writepage_locked(page, &wbc);
4129 cifs_fscache_invalidate_page(page, page->mapping->host);
4133 void cifs_oplock_break(struct work_struct *work)
4135 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4137 struct inode *inode = d_inode(cfile->dentry);
4138 struct cifsInodeInfo *cinode = CIFS_I(inode);
4139 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4140 struct TCP_Server_Info *server = tcon->ses->server;
4142 bool purge_cache = false;
4144 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4145 TASK_UNINTERRUPTIBLE);
4147 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4148 cfile->oplock_epoch, &purge_cache);
4150 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4151 cifs_has_mand_locks(cinode)) {
4152 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4157 if (inode && S_ISREG(inode->i_mode)) {
4158 if (CIFS_CACHE_READ(cinode))
4159 break_lease(inode, O_RDONLY);
4161 break_lease(inode, O_WRONLY);
4162 rc = filemap_fdatawrite(inode->i_mapping);
4163 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4164 rc = filemap_fdatawait(inode->i_mapping);
4165 mapping_set_error(inode->i_mapping, rc);
4166 cifs_zap_mapping(inode);
4168 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4169 if (CIFS_CACHE_WRITE(cinode))
4170 goto oplock_break_ack;
4173 rc = cifs_push_locks(cfile);
4175 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4179 * releasing stale oplock after recent reconnect of smb session using
4180 * a now incorrect file handle is not a data integrity issue but do
4181 * not bother sending an oplock release if session to server still is
4182 * disconnected since oplock already released by the server
4184 if (!cfile->oplock_break_cancelled) {
4185 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4187 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4189 _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
4190 cifs_done_oplock_break(cinode);
4194 * The presence of cifs_direct_io() in the address space ops vector
4195 * allowes open() O_DIRECT flags which would have failed otherwise.
4197 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4198 * so this method should never be called.
4200 * Direct IO is not yet supported in the cached mode.
4203 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4207 * Eventually need to support direct IO for non forcedirectio mounts
4213 const struct address_space_operations cifs_addr_ops = {
4214 .readpage = cifs_readpage,
4215 .readpages = cifs_readpages,
4216 .writepage = cifs_writepage,
4217 .writepages = cifs_writepages,
4218 .write_begin = cifs_write_begin,
4219 .write_end = cifs_write_end,
4220 .set_page_dirty = __set_page_dirty_nobuffers,
4221 .releasepage = cifs_release_page,
4222 .direct_IO = cifs_direct_io,
4223 .invalidatepage = cifs_invalidate_page,
4224 .launder_page = cifs_launder_page,
4228 * cifs_readpages requires the server to support a buffer large enough to
4229 * contain the header plus one complete page of data. Otherwise, we need
4230 * to leave cifs_readpages out of the address space operations.
4232 const struct address_space_operations cifs_addr_ops_smallbuf = {
4233 .readpage = cifs_readpage,
4234 .writepage = cifs_writepage,
4235 .writepages = cifs_writepages,
4236 .write_begin = cifs_write_begin,
4237 .write_end = cifs_write_end,
4238 .set_page_dirty = __set_page_dirty_nobuffers,
4239 .releasepage = cifs_release_page,
4240 .invalidatepage = cifs_invalidate_page,
4241 .launder_page = cifs_launder_page,