e64cda7a761012563a82dc2afbd2582e01ea2607
[releases.git] / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "cifs_unicode.h"
30 #include "cifs_debug.h"
31 #include "cifs_fs_sb.h"
32 #include "fscache.h"
33 #include "smbdirect.h"
34 #include "fs_context.h"
35 #include "cifs_ioctl.h"
36
37 static inline int cifs_convert_flags(unsigned int flags)
38 {
39         if ((flags & O_ACCMODE) == O_RDONLY)
40                 return GENERIC_READ;
41         else if ((flags & O_ACCMODE) == O_WRONLY)
42                 return GENERIC_WRITE;
43         else if ((flags & O_ACCMODE) == O_RDWR) {
44                 /* GENERIC_ALL is too much permission to request
45                    can cause unnecessary access denied on create */
46                 /* return GENERIC_ALL; */
47                 return (GENERIC_READ | GENERIC_WRITE);
48         }
49
50         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
51                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
52                 FILE_READ_DATA);
53 }
54
55 static u32 cifs_posix_convert_flags(unsigned int flags)
56 {
57         u32 posix_flags = 0;
58
59         if ((flags & O_ACCMODE) == O_RDONLY)
60                 posix_flags = SMB_O_RDONLY;
61         else if ((flags & O_ACCMODE) == O_WRONLY)
62                 posix_flags = SMB_O_WRONLY;
63         else if ((flags & O_ACCMODE) == O_RDWR)
64                 posix_flags = SMB_O_RDWR;
65
66         if (flags & O_CREAT) {
67                 posix_flags |= SMB_O_CREAT;
68                 if (flags & O_EXCL)
69                         posix_flags |= SMB_O_EXCL;
70         } else if (flags & O_EXCL)
71                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
72                          current->comm, current->tgid);
73
74         if (flags & O_TRUNC)
75                 posix_flags |= SMB_O_TRUNC;
76         /* be safe and imply O_SYNC for O_DSYNC */
77         if (flags & O_DSYNC)
78                 posix_flags |= SMB_O_SYNC;
79         if (flags & O_DIRECTORY)
80                 posix_flags |= SMB_O_DIRECTORY;
81         if (flags & O_NOFOLLOW)
82                 posix_flags |= SMB_O_NOFOLLOW;
83         if (flags & O_DIRECT)
84                 posix_flags |= SMB_O_DIRECT;
85
86         return posix_flags;
87 }
88
89 static inline int cifs_get_disposition(unsigned int flags)
90 {
91         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
92                 return FILE_CREATE;
93         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
94                 return FILE_OVERWRITE_IF;
95         else if ((flags & O_CREAT) == O_CREAT)
96                 return FILE_OPEN_IF;
97         else if ((flags & O_TRUNC) == O_TRUNC)
98                 return FILE_OVERWRITE;
99         else
100                 return FILE_OPEN;
101 }
102
103 int cifs_posix_open(const char *full_path, struct inode **pinode,
104                         struct super_block *sb, int mode, unsigned int f_flags,
105                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
106 {
107         int rc;
108         FILE_UNIX_BASIC_INFO *presp_data;
109         __u32 posix_flags = 0;
110         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
111         struct cifs_fattr fattr;
112         struct tcon_link *tlink;
113         struct cifs_tcon *tcon;
114
115         cifs_dbg(FYI, "posix open %s\n", full_path);
116
117         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
118         if (presp_data == NULL)
119                 return -ENOMEM;
120
121         tlink = cifs_sb_tlink(cifs_sb);
122         if (IS_ERR(tlink)) {
123                 rc = PTR_ERR(tlink);
124                 goto posix_open_ret;
125         }
126
127         tcon = tlink_tcon(tlink);
128         mode &= ~current_umask();
129
130         posix_flags = cifs_posix_convert_flags(f_flags);
131         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
132                              poplock, full_path, cifs_sb->local_nls,
133                              cifs_remap(cifs_sb));
134         cifs_put_tlink(tlink);
135
136         if (rc)
137                 goto posix_open_ret;
138
139         if (presp_data->Type == cpu_to_le32(-1))
140                 goto posix_open_ret; /* open ok, caller does qpathinfo */
141
142         if (!pinode)
143                 goto posix_open_ret; /* caller does not need info */
144
145         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
146
147         /* get new inode and set it up */
148         if (*pinode == NULL) {
149                 cifs_fill_uniqueid(sb, &fattr);
150                 *pinode = cifs_iget(sb, &fattr);
151                 if (!*pinode) {
152                         rc = -ENOMEM;
153                         goto posix_open_ret;
154                 }
155         } else {
156                 cifs_revalidate_mapping(*pinode);
157                 rc = cifs_fattr_to_inode(*pinode, &fattr);
158         }
159
160 posix_open_ret:
161         kfree(presp_data);
162         return rc;
163 }
164
165 static int
166 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
167              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
168              struct cifs_fid *fid, unsigned int xid)
169 {
170         int rc;
171         int desired_access;
172         int disposition;
173         int create_options = CREATE_NOT_DIR;
174         FILE_ALL_INFO *buf;
175         struct TCP_Server_Info *server = tcon->ses->server;
176         struct cifs_open_parms oparms;
177
178         if (!server->ops->open)
179                 return -ENOSYS;
180
181         desired_access = cifs_convert_flags(f_flags);
182
183 /*********************************************************************
184  *  open flag mapping table:
185  *
186  *      POSIX Flag            CIFS Disposition
187  *      ----------            ----------------
188  *      O_CREAT               FILE_OPEN_IF
189  *      O_CREAT | O_EXCL      FILE_CREATE
190  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
191  *      O_TRUNC               FILE_OVERWRITE
192  *      none of the above     FILE_OPEN
193  *
194  *      Note that there is not a direct match between disposition
195  *      FILE_SUPERSEDE (ie create whether or not file exists although
196  *      O_CREAT | O_TRUNC is similar but truncates the existing
197  *      file rather than creating a new file as FILE_SUPERSEDE does
198  *      (which uses the attributes / metadata passed in on open call)
199  *?
200  *?  O_SYNC is a reasonable match to CIFS writethrough flag
201  *?  and the read write flags match reasonably.  O_LARGEFILE
202  *?  is irrelevant because largefile support is always used
203  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205  *********************************************************************/
206
207         disposition = cifs_get_disposition(f_flags);
208
209         /* BB pass O_SYNC flag through on file attributes .. BB */
210
211         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
212         if (!buf)
213                 return -ENOMEM;
214
215         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
216         if (f_flags & O_SYNC)
217                 create_options |= CREATE_WRITE_THROUGH;
218
219         if (f_flags & O_DIRECT)
220                 create_options |= CREATE_NO_BUFFER;
221
222         oparms.tcon = tcon;
223         oparms.cifs_sb = cifs_sb;
224         oparms.desired_access = desired_access;
225         oparms.create_options = cifs_create_options(cifs_sb, create_options);
226         oparms.disposition = disposition;
227         oparms.path = full_path;
228         oparms.fid = fid;
229         oparms.reconnect = false;
230
231         rc = server->ops->open(xid, &oparms, oplock, buf);
232
233         if (rc)
234                 goto out;
235
236         /* TODO: Add support for calling posix query info but with passing in fid */
237         if (tcon->unix_ext)
238                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
239                                               xid);
240         else
241                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
242                                          xid, fid);
243
244         if (rc) {
245                 server->ops->close(xid, tcon, fid);
246                 if (rc == -ESTALE)
247                         rc = -EOPENSTALE;
248         }
249
250 out:
251         kfree(buf);
252         return rc;
253 }
254
255 static bool
256 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
257 {
258         struct cifs_fid_locks *cur;
259         bool has_locks = false;
260
261         down_read(&cinode->lock_sem);
262         list_for_each_entry(cur, &cinode->llist, llist) {
263                 if (!list_empty(&cur->locks)) {
264                         has_locks = true;
265                         break;
266                 }
267         }
268         up_read(&cinode->lock_sem);
269         return has_locks;
270 }
271
272 void
273 cifs_down_write(struct rw_semaphore *sem)
274 {
275         while (!down_write_trylock(sem))
276                 msleep(10);
277 }
278
279 static void cifsFileInfo_put_work(struct work_struct *work);
280
281 struct cifsFileInfo *
282 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
283                   struct tcon_link *tlink, __u32 oplock)
284 {
285         struct dentry *dentry = file_dentry(file);
286         struct inode *inode = d_inode(dentry);
287         struct cifsInodeInfo *cinode = CIFS_I(inode);
288         struct cifsFileInfo *cfile;
289         struct cifs_fid_locks *fdlocks;
290         struct cifs_tcon *tcon = tlink_tcon(tlink);
291         struct TCP_Server_Info *server = tcon->ses->server;
292
293         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
294         if (cfile == NULL)
295                 return cfile;
296
297         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
298         if (!fdlocks) {
299                 kfree(cfile);
300                 return NULL;
301         }
302
303         INIT_LIST_HEAD(&fdlocks->locks);
304         fdlocks->cfile = cfile;
305         cfile->llist = fdlocks;
306
307         cfile->count = 1;
308         cfile->pid = current->tgid;
309         cfile->uid = current_fsuid();
310         cfile->dentry = dget(dentry);
311         cfile->f_flags = file->f_flags;
312         cfile->invalidHandle = false;
313         cfile->deferred_close_scheduled = false;
314         cfile->tlink = cifs_get_tlink(tlink);
315         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
316         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
317         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
318         mutex_init(&cfile->fh_mutex);
319         spin_lock_init(&cfile->file_info_lock);
320
321         cifs_sb_active(inode->i_sb);
322
323         /*
324          * If the server returned a read oplock and we have mandatory brlocks,
325          * set oplock level to None.
326          */
327         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
328                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
329                 oplock = 0;
330         }
331
332         cifs_down_write(&cinode->lock_sem);
333         list_add(&fdlocks->llist, &cinode->llist);
334         up_write(&cinode->lock_sem);
335
336         spin_lock(&tcon->open_file_lock);
337         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
338                 oplock = fid->pending_open->oplock;
339         list_del(&fid->pending_open->olist);
340
341         fid->purge_cache = false;
342         server->ops->set_fid(cfile, fid, oplock);
343
344         list_add(&cfile->tlist, &tcon->openFileList);
345         atomic_inc(&tcon->num_local_opens);
346
347         /* if readable file instance put first in list*/
348         spin_lock(&cinode->open_file_lock);
349         if (file->f_mode & FMODE_READ)
350                 list_add(&cfile->flist, &cinode->openFileList);
351         else
352                 list_add_tail(&cfile->flist, &cinode->openFileList);
353         spin_unlock(&cinode->open_file_lock);
354         spin_unlock(&tcon->open_file_lock);
355
356         if (fid->purge_cache)
357                 cifs_zap_mapping(inode);
358
359         file->private_data = cfile;
360         return cfile;
361 }
362
363 struct cifsFileInfo *
364 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
365 {
366         spin_lock(&cifs_file->file_info_lock);
367         cifsFileInfo_get_locked(cifs_file);
368         spin_unlock(&cifs_file->file_info_lock);
369         return cifs_file;
370 }
371
372 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
373 {
374         struct inode *inode = d_inode(cifs_file->dentry);
375         struct cifsInodeInfo *cifsi = CIFS_I(inode);
376         struct cifsLockInfo *li, *tmp;
377         struct super_block *sb = inode->i_sb;
378
379         /*
380          * Delete any outstanding lock records. We'll lose them when the file
381          * is closed anyway.
382          */
383         cifs_down_write(&cifsi->lock_sem);
384         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
385                 list_del(&li->llist);
386                 cifs_del_lock_waiters(li);
387                 kfree(li);
388         }
389         list_del(&cifs_file->llist->llist);
390         kfree(cifs_file->llist);
391         up_write(&cifsi->lock_sem);
392
393         cifs_put_tlink(cifs_file->tlink);
394         dput(cifs_file->dentry);
395         cifs_sb_deactive(sb);
396         kfree(cifs_file);
397 }
398
399 static void cifsFileInfo_put_work(struct work_struct *work)
400 {
401         struct cifsFileInfo *cifs_file = container_of(work,
402                         struct cifsFileInfo, put);
403
404         cifsFileInfo_put_final(cifs_file);
405 }
406
407 /**
408  * cifsFileInfo_put - release a reference of file priv data
409  *
410  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
411  *
412  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
413  */
414 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
415 {
416         _cifsFileInfo_put(cifs_file, true, true);
417 }
418
419 /**
420  * _cifsFileInfo_put - release a reference of file priv data
421  *
422  * This may involve closing the filehandle @cifs_file out on the
423  * server. Must be called without holding tcon->open_file_lock,
424  * cinode->open_file_lock and cifs_file->file_info_lock.
425  *
426  * If @wait_for_oplock_handler is true and we are releasing the last
427  * reference, wait for any running oplock break handler of the file
428  * and cancel any pending one.
429  *
430  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
431  * @wait_oplock_handler: must be false if called from oplock_break_handler
432  * @offload:    not offloaded on close and oplock breaks
433  *
434  */
435 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
436                        bool wait_oplock_handler, bool offload)
437 {
438         struct inode *inode = d_inode(cifs_file->dentry);
439         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
440         struct TCP_Server_Info *server = tcon->ses->server;
441         struct cifsInodeInfo *cifsi = CIFS_I(inode);
442         struct super_block *sb = inode->i_sb;
443         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
444         struct cifs_fid fid;
445         struct cifs_pending_open open;
446         bool oplock_break_cancelled;
447
448         spin_lock(&tcon->open_file_lock);
449         spin_lock(&cifsi->open_file_lock);
450         spin_lock(&cifs_file->file_info_lock);
451         if (--cifs_file->count > 0) {
452                 spin_unlock(&cifs_file->file_info_lock);
453                 spin_unlock(&cifsi->open_file_lock);
454                 spin_unlock(&tcon->open_file_lock);
455                 return;
456         }
457         spin_unlock(&cifs_file->file_info_lock);
458
459         if (server->ops->get_lease_key)
460                 server->ops->get_lease_key(inode, &fid);
461
462         /* store open in pending opens to make sure we don't miss lease break */
463         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
464
465         /* remove it from the lists */
466         list_del(&cifs_file->flist);
467         list_del(&cifs_file->tlist);
468         atomic_dec(&tcon->num_local_opens);
469
470         if (list_empty(&cifsi->openFileList)) {
471                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
472                          d_inode(cifs_file->dentry));
473                 /*
474                  * In strict cache mode we need invalidate mapping on the last
475                  * close  because it may cause a error when we open this file
476                  * again and get at least level II oplock.
477                  */
478                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
479                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
480                 cifs_set_oplock_level(cifsi, 0);
481         }
482
483         spin_unlock(&cifsi->open_file_lock);
484         spin_unlock(&tcon->open_file_lock);
485
486         oplock_break_cancelled = wait_oplock_handler ?
487                 cancel_work_sync(&cifs_file->oplock_break) : false;
488
489         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
490                 struct TCP_Server_Info *server = tcon->ses->server;
491                 unsigned int xid;
492
493                 xid = get_xid();
494                 if (server->ops->close_getattr)
495                         server->ops->close_getattr(xid, tcon, cifs_file);
496                 else if (server->ops->close)
497                         server->ops->close(xid, tcon, &cifs_file->fid);
498                 _free_xid(xid);
499         }
500
501         if (oplock_break_cancelled)
502                 cifs_done_oplock_break(cifsi);
503
504         cifs_del_pending_open(&open);
505
506         if (offload)
507                 queue_work(fileinfo_put_wq, &cifs_file->put);
508         else
509                 cifsFileInfo_put_final(cifs_file);
510 }
511
512 int cifs_open(struct inode *inode, struct file *file)
513
514 {
515         int rc = -EACCES;
516         unsigned int xid;
517         __u32 oplock;
518         struct cifs_sb_info *cifs_sb;
519         struct TCP_Server_Info *server;
520         struct cifs_tcon *tcon;
521         struct tcon_link *tlink;
522         struct cifsFileInfo *cfile = NULL;
523         void *page;
524         const char *full_path;
525         bool posix_open_ok = false;
526         struct cifs_fid fid;
527         struct cifs_pending_open open;
528
529         xid = get_xid();
530
531         cifs_sb = CIFS_SB(inode->i_sb);
532         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
533                 free_xid(xid);
534                 return -EIO;
535         }
536
537         tlink = cifs_sb_tlink(cifs_sb);
538         if (IS_ERR(tlink)) {
539                 free_xid(xid);
540                 return PTR_ERR(tlink);
541         }
542         tcon = tlink_tcon(tlink);
543         server = tcon->ses->server;
544
545         page = alloc_dentry_path();
546         full_path = build_path_from_dentry(file_dentry(file), page);
547         if (IS_ERR(full_path)) {
548                 rc = PTR_ERR(full_path);
549                 goto out;
550         }
551
552         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
553                  inode, file->f_flags, full_path);
554
555         if (file->f_flags & O_DIRECT &&
556             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
557                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
558                         file->f_op = &cifs_file_direct_nobrl_ops;
559                 else
560                         file->f_op = &cifs_file_direct_ops;
561         }
562
563         /* Get the cached handle as SMB2 close is deferred */
564         rc = cifs_get_readable_path(tcon, full_path, &cfile);
565         if (rc == 0) {
566                 if (file->f_flags == cfile->f_flags) {
567                         file->private_data = cfile;
568                         spin_lock(&CIFS_I(inode)->deferred_lock);
569                         cifs_del_deferred_close(cfile);
570                         spin_unlock(&CIFS_I(inode)->deferred_lock);
571                         goto use_cache;
572                 } else {
573                         _cifsFileInfo_put(cfile, true, false);
574                 }
575         }
576
577         if (server->oplocks)
578                 oplock = REQ_OPLOCK;
579         else
580                 oplock = 0;
581
582         if (!tcon->broken_posix_open && tcon->unix_ext &&
583             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
584                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
585                 /* can not refresh inode info since size could be stale */
586                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
587                                 cifs_sb->ctx->file_mode /* ignored */,
588                                 file->f_flags, &oplock, &fid.netfid, xid);
589                 if (rc == 0) {
590                         cifs_dbg(FYI, "posix open succeeded\n");
591                         posix_open_ok = true;
592                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
593                         if (tcon->ses->serverNOS)
594                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
595                                          tcon->ses->ip_addr,
596                                          tcon->ses->serverNOS);
597                         tcon->broken_posix_open = true;
598                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
599                          (rc != -EOPNOTSUPP)) /* path not found or net err */
600                         goto out;
601                 /*
602                  * Else fallthrough to retry open the old way on network i/o
603                  * or DFS errors.
604                  */
605         }
606
607         if (server->ops->get_lease_key)
608                 server->ops->get_lease_key(inode, &fid);
609
610         cifs_add_pending_open(&fid, tlink, &open);
611
612         if (!posix_open_ok) {
613                 if (server->ops->get_lease_key)
614                         server->ops->get_lease_key(inode, &fid);
615
616                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
617                                   file->f_flags, &oplock, &fid, xid);
618                 if (rc) {
619                         cifs_del_pending_open(&open);
620                         goto out;
621                 }
622         }
623
624         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
625         if (cfile == NULL) {
626                 if (server->ops->close)
627                         server->ops->close(xid, tcon, &fid);
628                 cifs_del_pending_open(&open);
629                 rc = -ENOMEM;
630                 goto out;
631         }
632
633         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
634                 /*
635                  * Time to set mode which we can not set earlier due to
636                  * problems creating new read-only files.
637                  */
638                 struct cifs_unix_set_info_args args = {
639                         .mode   = inode->i_mode,
640                         .uid    = INVALID_UID, /* no change */
641                         .gid    = INVALID_GID, /* no change */
642                         .ctime  = NO_CHANGE_64,
643                         .atime  = NO_CHANGE_64,
644                         .mtime  = NO_CHANGE_64,
645                         .device = 0,
646                 };
647                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
648                                        cfile->pid);
649         }
650
651 use_cache:
652         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
653                            file->f_mode & FMODE_WRITE);
654         if (file->f_flags & O_DIRECT &&
655             (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
656              file->f_flags & O_APPEND))
657                 cifs_invalidate_cache(file_inode(file),
658                                       FSCACHE_INVAL_DIO_WRITE);
659
660 out:
661         free_dentry_path(page);
662         free_xid(xid);
663         cifs_put_tlink(tlink);
664         return rc;
665 }
666
667 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
668
669 /*
670  * Try to reacquire byte range locks that were released when session
671  * to server was lost.
672  */
673 static int
674 cifs_relock_file(struct cifsFileInfo *cfile)
675 {
676         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
677         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
678         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
679         int rc = 0;
680
681         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
682         if (cinode->can_cache_brlcks) {
683                 /* can cache locks - no need to relock */
684                 up_read(&cinode->lock_sem);
685                 return rc;
686         }
687
688         if (cap_unix(tcon->ses) &&
689             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
690             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
691                 rc = cifs_push_posix_locks(cfile);
692         else
693                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
694
695         up_read(&cinode->lock_sem);
696         return rc;
697 }
698
699 static int
700 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
701 {
702         int rc = -EACCES;
703         unsigned int xid;
704         __u32 oplock;
705         struct cifs_sb_info *cifs_sb;
706         struct cifs_tcon *tcon;
707         struct TCP_Server_Info *server;
708         struct cifsInodeInfo *cinode;
709         struct inode *inode;
710         void *page;
711         const char *full_path;
712         int desired_access;
713         int disposition = FILE_OPEN;
714         int create_options = CREATE_NOT_DIR;
715         struct cifs_open_parms oparms;
716
717         xid = get_xid();
718         mutex_lock(&cfile->fh_mutex);
719         if (!cfile->invalidHandle) {
720                 mutex_unlock(&cfile->fh_mutex);
721                 free_xid(xid);
722                 return 0;
723         }
724
725         inode = d_inode(cfile->dentry);
726         cifs_sb = CIFS_SB(inode->i_sb);
727         tcon = tlink_tcon(cfile->tlink);
728         server = tcon->ses->server;
729
730         /*
731          * Can not grab rename sem here because various ops, including those
732          * that already have the rename sem can end up causing writepage to get
733          * called and if the server was down that means we end up here, and we
734          * can never tell if the caller already has the rename_sem.
735          */
736         page = alloc_dentry_path();
737         full_path = build_path_from_dentry(cfile->dentry, page);
738         if (IS_ERR(full_path)) {
739                 mutex_unlock(&cfile->fh_mutex);
740                 free_dentry_path(page);
741                 free_xid(xid);
742                 return PTR_ERR(full_path);
743         }
744
745         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
746                  inode, cfile->f_flags, full_path);
747
748         if (tcon->ses->server->oplocks)
749                 oplock = REQ_OPLOCK;
750         else
751                 oplock = 0;
752
753         if (tcon->unix_ext && cap_unix(tcon->ses) &&
754             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
755                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
756                 /*
757                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
758                  * original open. Must mask them off for a reopen.
759                  */
760                 unsigned int oflags = cfile->f_flags &
761                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
762
763                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
764                                      cifs_sb->ctx->file_mode /* ignored */,
765                                      oflags, &oplock, &cfile->fid.netfid, xid);
766                 if (rc == 0) {
767                         cifs_dbg(FYI, "posix reopen succeeded\n");
768                         oparms.reconnect = true;
769                         goto reopen_success;
770                 }
771                 /*
772                  * fallthrough to retry open the old way on errors, especially
773                  * in the reconnect path it is important to retry hard
774                  */
775         }
776
777         desired_access = cifs_convert_flags(cfile->f_flags);
778
779         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
780         if (cfile->f_flags & O_SYNC)
781                 create_options |= CREATE_WRITE_THROUGH;
782
783         if (cfile->f_flags & O_DIRECT)
784                 create_options |= CREATE_NO_BUFFER;
785
786         if (server->ops->get_lease_key)
787                 server->ops->get_lease_key(inode, &cfile->fid);
788
789         oparms.tcon = tcon;
790         oparms.cifs_sb = cifs_sb;
791         oparms.desired_access = desired_access;
792         oparms.create_options = cifs_create_options(cifs_sb, create_options);
793         oparms.disposition = disposition;
794         oparms.path = full_path;
795         oparms.fid = &cfile->fid;
796         oparms.reconnect = true;
797
798         /*
799          * Can not refresh inode by passing in file_info buf to be returned by
800          * ops->open and then calling get_inode_info with returned buf since
801          * file might have write behind data that needs to be flushed and server
802          * version of file size can be stale. If we knew for sure that inode was
803          * not dirty locally we could do this.
804          */
805         rc = server->ops->open(xid, &oparms, &oplock, NULL);
806         if (rc == -ENOENT && oparms.reconnect == false) {
807                 /* durable handle timeout is expired - open the file again */
808                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
809                 /* indicate that we need to relock the file */
810                 oparms.reconnect = true;
811         }
812
813         if (rc) {
814                 mutex_unlock(&cfile->fh_mutex);
815                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
816                 cifs_dbg(FYI, "oplock: %d\n", oplock);
817                 goto reopen_error_exit;
818         }
819
820 reopen_success:
821         cfile->invalidHandle = false;
822         mutex_unlock(&cfile->fh_mutex);
823         cinode = CIFS_I(inode);
824
825         if (can_flush) {
826                 rc = filemap_write_and_wait(inode->i_mapping);
827                 if (!is_interrupt_error(rc))
828                         mapping_set_error(inode->i_mapping, rc);
829
830                 if (tcon->posix_extensions)
831                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
832                 else if (tcon->unix_ext)
833                         rc = cifs_get_inode_info_unix(&inode, full_path,
834                                                       inode->i_sb, xid);
835                 else
836                         rc = cifs_get_inode_info(&inode, full_path, NULL,
837                                                  inode->i_sb, xid, NULL);
838         }
839         /*
840          * Else we are writing out data to server already and could deadlock if
841          * we tried to flush data, and since we do not know if we have data that
842          * would invalidate the current end of file on the server we can not go
843          * to the server to get the new inode info.
844          */
845
846         /*
847          * If the server returned a read oplock and we have mandatory brlocks,
848          * set oplock level to None.
849          */
850         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
851                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
852                 oplock = 0;
853         }
854
855         server->ops->set_fid(cfile, &cfile->fid, oplock);
856         if (oparms.reconnect)
857                 cifs_relock_file(cfile);
858
859 reopen_error_exit:
860         free_dentry_path(page);
861         free_xid(xid);
862         return rc;
863 }
864
865 void smb2_deferred_work_close(struct work_struct *work)
866 {
867         struct cifsFileInfo *cfile = container_of(work,
868                         struct cifsFileInfo, deferred.work);
869
870         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
871         cifs_del_deferred_close(cfile);
872         cfile->deferred_close_scheduled = false;
873         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
874         _cifsFileInfo_put(cfile, true, false);
875 }
876
877 int cifs_close(struct inode *inode, struct file *file)
878 {
879         struct cifsFileInfo *cfile;
880         struct cifsInodeInfo *cinode = CIFS_I(inode);
881         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
882         struct cifs_deferred_close *dclose;
883
884         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
885
886         if (file->private_data != NULL) {
887                 cfile = file->private_data;
888                 file->private_data = NULL;
889                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
890                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
891                     cinode->lease_granted &&
892                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
893                     dclose) {
894                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
895                                 inode->i_ctime = inode->i_mtime = current_time(inode);
896                         }
897                         spin_lock(&cinode->deferred_lock);
898                         cifs_add_deferred_close(cfile, dclose);
899                         if (cfile->deferred_close_scheduled &&
900                             delayed_work_pending(&cfile->deferred)) {
901                                 /*
902                                  * If there is no pending work, mod_delayed_work queues new work.
903                                  * So, Increase the ref count to avoid use-after-free.
904                                  */
905                                 if (!mod_delayed_work(deferredclose_wq,
906                                                 &cfile->deferred, cifs_sb->ctx->acregmax))
907                                         cifsFileInfo_get(cfile);
908                         } else {
909                                 /* Deferred close for files */
910                                 queue_delayed_work(deferredclose_wq,
911                                                 &cfile->deferred, cifs_sb->ctx->acregmax);
912                                 cfile->deferred_close_scheduled = true;
913                                 spin_unlock(&cinode->deferred_lock);
914                                 return 0;
915                         }
916                         spin_unlock(&cinode->deferred_lock);
917                         _cifsFileInfo_put(cfile, true, false);
918                 } else {
919                         _cifsFileInfo_put(cfile, true, false);
920                         kfree(dclose);
921                 }
922         }
923
924         /* return code from the ->release op is always ignored */
925         return 0;
926 }
927
928 void
929 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
930 {
931         struct cifsFileInfo *open_file;
932         struct list_head *tmp;
933         struct list_head *tmp1;
934         struct list_head tmp_list;
935
936         if (!tcon->use_persistent || !tcon->need_reopen_files)
937                 return;
938
939         tcon->need_reopen_files = false;
940
941         cifs_dbg(FYI, "Reopen persistent handles\n");
942         INIT_LIST_HEAD(&tmp_list);
943
944         /* list all files open on tree connection, reopen resilient handles  */
945         spin_lock(&tcon->open_file_lock);
946         list_for_each(tmp, &tcon->openFileList) {
947                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
948                 if (!open_file->invalidHandle)
949                         continue;
950                 cifsFileInfo_get(open_file);
951                 list_add_tail(&open_file->rlist, &tmp_list);
952         }
953         spin_unlock(&tcon->open_file_lock);
954
955         list_for_each_safe(tmp, tmp1, &tmp_list) {
956                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
957                 if (cifs_reopen_file(open_file, false /* do not flush */))
958                         tcon->need_reopen_files = true;
959                 list_del_init(&open_file->rlist);
960                 cifsFileInfo_put(open_file);
961         }
962 }
963
964 int cifs_closedir(struct inode *inode, struct file *file)
965 {
966         int rc = 0;
967         unsigned int xid;
968         struct cifsFileInfo *cfile = file->private_data;
969         struct cifs_tcon *tcon;
970         struct TCP_Server_Info *server;
971         char *buf;
972
973         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
974
975         if (cfile == NULL)
976                 return rc;
977
978         xid = get_xid();
979         tcon = tlink_tcon(cfile->tlink);
980         server = tcon->ses->server;
981
982         cifs_dbg(FYI, "Freeing private data in close dir\n");
983         spin_lock(&cfile->file_info_lock);
984         if (server->ops->dir_needs_close(cfile)) {
985                 cfile->invalidHandle = true;
986                 spin_unlock(&cfile->file_info_lock);
987                 if (server->ops->close_dir)
988                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
989                 else
990                         rc = -ENOSYS;
991                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
992                 /* not much we can do if it fails anyway, ignore rc */
993                 rc = 0;
994         } else
995                 spin_unlock(&cfile->file_info_lock);
996
997         buf = cfile->srch_inf.ntwrk_buf_start;
998         if (buf) {
999                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1000                 cfile->srch_inf.ntwrk_buf_start = NULL;
1001                 if (cfile->srch_inf.smallBuf)
1002                         cifs_small_buf_release(buf);
1003                 else
1004                         cifs_buf_release(buf);
1005         }
1006
1007         cifs_put_tlink(cfile->tlink);
1008         kfree(file->private_data);
1009         file->private_data = NULL;
1010         /* BB can we lock the filestruct while this is going on? */
1011         free_xid(xid);
1012         return rc;
1013 }
1014
1015 static struct cifsLockInfo *
1016 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1017 {
1018         struct cifsLockInfo *lock =
1019                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1020         if (!lock)
1021                 return lock;
1022         lock->offset = offset;
1023         lock->length = length;
1024         lock->type = type;
1025         lock->pid = current->tgid;
1026         lock->flags = flags;
1027         INIT_LIST_HEAD(&lock->blist);
1028         init_waitqueue_head(&lock->block_q);
1029         return lock;
1030 }
1031
1032 void
1033 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1034 {
1035         struct cifsLockInfo *li, *tmp;
1036         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1037                 list_del_init(&li->blist);
1038                 wake_up(&li->block_q);
1039         }
1040 }
1041
1042 #define CIFS_LOCK_OP    0
1043 #define CIFS_READ_OP    1
1044 #define CIFS_WRITE_OP   2
1045
1046 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1047 static bool
1048 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1049                             __u64 length, __u8 type, __u16 flags,
1050                             struct cifsFileInfo *cfile,
1051                             struct cifsLockInfo **conf_lock, int rw_check)
1052 {
1053         struct cifsLockInfo *li;
1054         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1055         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1056
1057         list_for_each_entry(li, &fdlocks->locks, llist) {
1058                 if (offset + length <= li->offset ||
1059                     offset >= li->offset + li->length)
1060                         continue;
1061                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1062                     server->ops->compare_fids(cfile, cur_cfile)) {
1063                         /* shared lock prevents write op through the same fid */
1064                         if (!(li->type & server->vals->shared_lock_type) ||
1065                             rw_check != CIFS_WRITE_OP)
1066                                 continue;
1067                 }
1068                 if ((type & server->vals->shared_lock_type) &&
1069                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1070                      current->tgid == li->pid) || type == li->type))
1071                         continue;
1072                 if (rw_check == CIFS_LOCK_OP &&
1073                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1074                     server->ops->compare_fids(cfile, cur_cfile))
1075                         continue;
1076                 if (conf_lock)
1077                         *conf_lock = li;
1078                 return true;
1079         }
1080         return false;
1081 }
1082
1083 bool
1084 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1085                         __u8 type, __u16 flags,
1086                         struct cifsLockInfo **conf_lock, int rw_check)
1087 {
1088         bool rc = false;
1089         struct cifs_fid_locks *cur;
1090         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1091
1092         list_for_each_entry(cur, &cinode->llist, llist) {
1093                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1094                                                  flags, cfile, conf_lock,
1095                                                  rw_check);
1096                 if (rc)
1097                         break;
1098         }
1099
1100         return rc;
1101 }
1102
1103 /*
1104  * Check if there is another lock that prevents us to set the lock (mandatory
1105  * style). If such a lock exists, update the flock structure with its
1106  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1107  * or leave it the same if we can't. Returns 0 if we don't need to request to
1108  * the server or 1 otherwise.
1109  */
1110 static int
1111 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1112                __u8 type, struct file_lock *flock)
1113 {
1114         int rc = 0;
1115         struct cifsLockInfo *conf_lock;
1116         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1117         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1118         bool exist;
1119
1120         down_read(&cinode->lock_sem);
1121
1122         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1123                                         flock->fl_flags, &conf_lock,
1124                                         CIFS_LOCK_OP);
1125         if (exist) {
1126                 flock->fl_start = conf_lock->offset;
1127                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1128                 flock->fl_pid = conf_lock->pid;
1129                 if (conf_lock->type & server->vals->shared_lock_type)
1130                         flock->fl_type = F_RDLCK;
1131                 else
1132                         flock->fl_type = F_WRLCK;
1133         } else if (!cinode->can_cache_brlcks)
1134                 rc = 1;
1135         else
1136                 flock->fl_type = F_UNLCK;
1137
1138         up_read(&cinode->lock_sem);
1139         return rc;
1140 }
1141
1142 static void
1143 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1144 {
1145         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1146         cifs_down_write(&cinode->lock_sem);
1147         list_add_tail(&lock->llist, &cfile->llist->locks);
1148         up_write(&cinode->lock_sem);
1149 }
1150
1151 /*
1152  * Set the byte-range lock (mandatory style). Returns:
1153  * 1) 0, if we set the lock and don't need to request to the server;
1154  * 2) 1, if no locks prevent us but we need to request to the server;
1155  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1156  */
1157 static int
1158 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1159                  bool wait)
1160 {
1161         struct cifsLockInfo *conf_lock;
1162         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1163         bool exist;
1164         int rc = 0;
1165
1166 try_again:
1167         exist = false;
1168         cifs_down_write(&cinode->lock_sem);
1169
1170         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1171                                         lock->type, lock->flags, &conf_lock,
1172                                         CIFS_LOCK_OP);
1173         if (!exist && cinode->can_cache_brlcks) {
1174                 list_add_tail(&lock->llist, &cfile->llist->locks);
1175                 up_write(&cinode->lock_sem);
1176                 return rc;
1177         }
1178
1179         if (!exist)
1180                 rc = 1;
1181         else if (!wait)
1182                 rc = -EACCES;
1183         else {
1184                 list_add_tail(&lock->blist, &conf_lock->blist);
1185                 up_write(&cinode->lock_sem);
1186                 rc = wait_event_interruptible(lock->block_q,
1187                                         (lock->blist.prev == &lock->blist) &&
1188                                         (lock->blist.next == &lock->blist));
1189                 if (!rc)
1190                         goto try_again;
1191                 cifs_down_write(&cinode->lock_sem);
1192                 list_del_init(&lock->blist);
1193         }
1194
1195         up_write(&cinode->lock_sem);
1196         return rc;
1197 }
1198
1199 /*
1200  * Check if there is another lock that prevents us to set the lock (posix
1201  * style). If such a lock exists, update the flock structure with its
1202  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1203  * or leave it the same if we can't. Returns 0 if we don't need to request to
1204  * the server or 1 otherwise.
1205  */
1206 static int
1207 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1208 {
1209         int rc = 0;
1210         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1211         unsigned char saved_type = flock->fl_type;
1212
1213         if ((flock->fl_flags & FL_POSIX) == 0)
1214                 return 1;
1215
1216         down_read(&cinode->lock_sem);
1217         posix_test_lock(file, flock);
1218
1219         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1220                 flock->fl_type = saved_type;
1221                 rc = 1;
1222         }
1223
1224         up_read(&cinode->lock_sem);
1225         return rc;
1226 }
1227
1228 /*
1229  * Set the byte-range lock (posix style). Returns:
1230  * 1) <0, if the error occurs while setting the lock;
1231  * 2) 0, if we set the lock and don't need to request to the server;
1232  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1233  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1234  */
1235 static int
1236 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1237 {
1238         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1239         int rc = FILE_LOCK_DEFERRED + 1;
1240
1241         if ((flock->fl_flags & FL_POSIX) == 0)
1242                 return rc;
1243
1244         cifs_down_write(&cinode->lock_sem);
1245         if (!cinode->can_cache_brlcks) {
1246                 up_write(&cinode->lock_sem);
1247                 return rc;
1248         }
1249
1250         rc = posix_lock_file(file, flock, NULL);
1251         up_write(&cinode->lock_sem);
1252         return rc;
1253 }
1254
1255 int
1256 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1257 {
1258         unsigned int xid;
1259         int rc = 0, stored_rc;
1260         struct cifsLockInfo *li, *tmp;
1261         struct cifs_tcon *tcon;
1262         unsigned int num, max_num, max_buf;
1263         LOCKING_ANDX_RANGE *buf, *cur;
1264         static const int types[] = {
1265                 LOCKING_ANDX_LARGE_FILES,
1266                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1267         };
1268         int i;
1269
1270         xid = get_xid();
1271         tcon = tlink_tcon(cfile->tlink);
1272
1273         /*
1274          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1275          * and check it before using.
1276          */
1277         max_buf = tcon->ses->server->maxBuf;
1278         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1279                 free_xid(xid);
1280                 return -EINVAL;
1281         }
1282
1283         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1284                      PAGE_SIZE);
1285         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1286                         PAGE_SIZE);
1287         max_num = (max_buf - sizeof(struct smb_hdr)) /
1288                                                 sizeof(LOCKING_ANDX_RANGE);
1289         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1290         if (!buf) {
1291                 free_xid(xid);
1292                 return -ENOMEM;
1293         }
1294
1295         for (i = 0; i < 2; i++) {
1296                 cur = buf;
1297                 num = 0;
1298                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1299                         if (li->type != types[i])
1300                                 continue;
1301                         cur->Pid = cpu_to_le16(li->pid);
1302                         cur->LengthLow = cpu_to_le32((u32)li->length);
1303                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1304                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1305                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1306                         if (++num == max_num) {
1307                                 stored_rc = cifs_lockv(xid, tcon,
1308                                                        cfile->fid.netfid,
1309                                                        (__u8)li->type, 0, num,
1310                                                        buf);
1311                                 if (stored_rc)
1312                                         rc = stored_rc;
1313                                 cur = buf;
1314                                 num = 0;
1315                         } else
1316                                 cur++;
1317                 }
1318
1319                 if (num) {
1320                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1321                                                (__u8)types[i], 0, num, buf);
1322                         if (stored_rc)
1323                                 rc = stored_rc;
1324                 }
1325         }
1326
1327         kfree(buf);
1328         free_xid(xid);
1329         return rc;
1330 }
1331
1332 static __u32
1333 hash_lockowner(fl_owner_t owner)
1334 {
1335         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1336 }
1337
1338 struct lock_to_push {
1339         struct list_head llist;
1340         __u64 offset;
1341         __u64 length;
1342         __u32 pid;
1343         __u16 netfid;
1344         __u8 type;
1345 };
1346
1347 static int
1348 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1349 {
1350         struct inode *inode = d_inode(cfile->dentry);
1351         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1352         struct file_lock *flock;
1353         struct file_lock_context *flctx = inode->i_flctx;
1354         unsigned int count = 0, i;
1355         int rc = 0, xid, type;
1356         struct list_head locks_to_send, *el;
1357         struct lock_to_push *lck, *tmp;
1358         __u64 length;
1359
1360         xid = get_xid();
1361
1362         if (!flctx)
1363                 goto out;
1364
1365         spin_lock(&flctx->flc_lock);
1366         list_for_each(el, &flctx->flc_posix) {
1367                 count++;
1368         }
1369         spin_unlock(&flctx->flc_lock);
1370
1371         INIT_LIST_HEAD(&locks_to_send);
1372
1373         /*
1374          * Allocating count locks is enough because no FL_POSIX locks can be
1375          * added to the list while we are holding cinode->lock_sem that
1376          * protects locking operations of this inode.
1377          */
1378         for (i = 0; i < count; i++) {
1379                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1380                 if (!lck) {
1381                         rc = -ENOMEM;
1382                         goto err_out;
1383                 }
1384                 list_add_tail(&lck->llist, &locks_to_send);
1385         }
1386
1387         el = locks_to_send.next;
1388         spin_lock(&flctx->flc_lock);
1389         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1390                 if (el == &locks_to_send) {
1391                         /*
1392                          * The list ended. We don't have enough allocated
1393                          * structures - something is really wrong.
1394                          */
1395                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1396                         break;
1397                 }
1398                 length = cifs_flock_len(flock);
1399                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1400                         type = CIFS_RDLCK;
1401                 else
1402                         type = CIFS_WRLCK;
1403                 lck = list_entry(el, struct lock_to_push, llist);
1404                 lck->pid = hash_lockowner(flock->fl_owner);
1405                 lck->netfid = cfile->fid.netfid;
1406                 lck->length = length;
1407                 lck->type = type;
1408                 lck->offset = flock->fl_start;
1409         }
1410         spin_unlock(&flctx->flc_lock);
1411
1412         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1413                 int stored_rc;
1414
1415                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1416                                              lck->offset, lck->length, NULL,
1417                                              lck->type, 0);
1418                 if (stored_rc)
1419                         rc = stored_rc;
1420                 list_del(&lck->llist);
1421                 kfree(lck);
1422         }
1423
1424 out:
1425         free_xid(xid);
1426         return rc;
1427 err_out:
1428         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1429                 list_del(&lck->llist);
1430                 kfree(lck);
1431         }
1432         goto out;
1433 }
1434
1435 static int
1436 cifs_push_locks(struct cifsFileInfo *cfile)
1437 {
1438         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1439         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1440         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1441         int rc = 0;
1442
1443         /* we are going to update can_cache_brlcks here - need a write access */
1444         cifs_down_write(&cinode->lock_sem);
1445         if (!cinode->can_cache_brlcks) {
1446                 up_write(&cinode->lock_sem);
1447                 return rc;
1448         }
1449
1450         if (cap_unix(tcon->ses) &&
1451             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1452             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1453                 rc = cifs_push_posix_locks(cfile);
1454         else
1455                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1456
1457         cinode->can_cache_brlcks = false;
1458         up_write(&cinode->lock_sem);
1459         return rc;
1460 }
1461
1462 static void
1463 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1464                 bool *wait_flag, struct TCP_Server_Info *server)
1465 {
1466         if (flock->fl_flags & FL_POSIX)
1467                 cifs_dbg(FYI, "Posix\n");
1468         if (flock->fl_flags & FL_FLOCK)
1469                 cifs_dbg(FYI, "Flock\n");
1470         if (flock->fl_flags & FL_SLEEP) {
1471                 cifs_dbg(FYI, "Blocking lock\n");
1472                 *wait_flag = true;
1473         }
1474         if (flock->fl_flags & FL_ACCESS)
1475                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1476         if (flock->fl_flags & FL_LEASE)
1477                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1478         if (flock->fl_flags &
1479             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1480                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1481                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1482
1483         *type = server->vals->large_lock_type;
1484         if (flock->fl_type == F_WRLCK) {
1485                 cifs_dbg(FYI, "F_WRLCK\n");
1486                 *type |= server->vals->exclusive_lock_type;
1487                 *lock = 1;
1488         } else if (flock->fl_type == F_UNLCK) {
1489                 cifs_dbg(FYI, "F_UNLCK\n");
1490                 *type |= server->vals->unlock_lock_type;
1491                 *unlock = 1;
1492                 /* Check if unlock includes more than one lock range */
1493         } else if (flock->fl_type == F_RDLCK) {
1494                 cifs_dbg(FYI, "F_RDLCK\n");
1495                 *type |= server->vals->shared_lock_type;
1496                 *lock = 1;
1497         } else if (flock->fl_type == F_EXLCK) {
1498                 cifs_dbg(FYI, "F_EXLCK\n");
1499                 *type |= server->vals->exclusive_lock_type;
1500                 *lock = 1;
1501         } else if (flock->fl_type == F_SHLCK) {
1502                 cifs_dbg(FYI, "F_SHLCK\n");
1503                 *type |= server->vals->shared_lock_type;
1504                 *lock = 1;
1505         } else
1506                 cifs_dbg(FYI, "Unknown type of lock\n");
1507 }
1508
1509 static int
1510 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1511            bool wait_flag, bool posix_lck, unsigned int xid)
1512 {
1513         int rc = 0;
1514         __u64 length = cifs_flock_len(flock);
1515         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1516         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1517         struct TCP_Server_Info *server = tcon->ses->server;
1518         __u16 netfid = cfile->fid.netfid;
1519
1520         if (posix_lck) {
1521                 int posix_lock_type;
1522
1523                 rc = cifs_posix_lock_test(file, flock);
1524                 if (!rc)
1525                         return rc;
1526
1527                 if (type & server->vals->shared_lock_type)
1528                         posix_lock_type = CIFS_RDLCK;
1529                 else
1530                         posix_lock_type = CIFS_WRLCK;
1531                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1532                                       hash_lockowner(flock->fl_owner),
1533                                       flock->fl_start, length, flock,
1534                                       posix_lock_type, wait_flag);
1535                 return rc;
1536         }
1537
1538         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1539         if (!rc)
1540                 return rc;
1541
1542         /* BB we could chain these into one lock request BB */
1543         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1544                                     1, 0, false);
1545         if (rc == 0) {
1546                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1547                                             type, 0, 1, false);
1548                 flock->fl_type = F_UNLCK;
1549                 if (rc != 0)
1550                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1551                                  rc);
1552                 return 0;
1553         }
1554
1555         if (type & server->vals->shared_lock_type) {
1556                 flock->fl_type = F_WRLCK;
1557                 return 0;
1558         }
1559
1560         type &= ~server->vals->exclusive_lock_type;
1561
1562         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1563                                     type | server->vals->shared_lock_type,
1564                                     1, 0, false);
1565         if (rc == 0) {
1566                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1567                         type | server->vals->shared_lock_type, 0, 1, false);
1568                 flock->fl_type = F_RDLCK;
1569                 if (rc != 0)
1570                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1571                                  rc);
1572         } else
1573                 flock->fl_type = F_WRLCK;
1574
1575         return 0;
1576 }
1577
1578 void
1579 cifs_move_llist(struct list_head *source, struct list_head *dest)
1580 {
1581         struct list_head *li, *tmp;
1582         list_for_each_safe(li, tmp, source)
1583                 list_move(li, dest);
1584 }
1585
1586 void
1587 cifs_free_llist(struct list_head *llist)
1588 {
1589         struct cifsLockInfo *li, *tmp;
1590         list_for_each_entry_safe(li, tmp, llist, llist) {
1591                 cifs_del_lock_waiters(li);
1592                 list_del(&li->llist);
1593                 kfree(li);
1594         }
1595 }
1596
1597 int
1598 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1599                   unsigned int xid)
1600 {
1601         int rc = 0, stored_rc;
1602         static const int types[] = {
1603                 LOCKING_ANDX_LARGE_FILES,
1604                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1605         };
1606         unsigned int i;
1607         unsigned int max_num, num, max_buf;
1608         LOCKING_ANDX_RANGE *buf, *cur;
1609         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1610         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1611         struct cifsLockInfo *li, *tmp;
1612         __u64 length = cifs_flock_len(flock);
1613         struct list_head tmp_llist;
1614
1615         INIT_LIST_HEAD(&tmp_llist);
1616
1617         /*
1618          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1619          * and check it before using.
1620          */
1621         max_buf = tcon->ses->server->maxBuf;
1622         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1623                 return -EINVAL;
1624
1625         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1626                      PAGE_SIZE);
1627         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1628                         PAGE_SIZE);
1629         max_num = (max_buf - sizeof(struct smb_hdr)) /
1630                                                 sizeof(LOCKING_ANDX_RANGE);
1631         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1632         if (!buf)
1633                 return -ENOMEM;
1634
1635         cifs_down_write(&cinode->lock_sem);
1636         for (i = 0; i < 2; i++) {
1637                 cur = buf;
1638                 num = 0;
1639                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1640                         if (flock->fl_start > li->offset ||
1641                             (flock->fl_start + length) <
1642                             (li->offset + li->length))
1643                                 continue;
1644                         if (current->tgid != li->pid)
1645                                 continue;
1646                         if (types[i] != li->type)
1647                                 continue;
1648                         if (cinode->can_cache_brlcks) {
1649                                 /*
1650                                  * We can cache brlock requests - simply remove
1651                                  * a lock from the file's list.
1652                                  */
1653                                 list_del(&li->llist);
1654                                 cifs_del_lock_waiters(li);
1655                                 kfree(li);
1656                                 continue;
1657                         }
1658                         cur->Pid = cpu_to_le16(li->pid);
1659                         cur->LengthLow = cpu_to_le32((u32)li->length);
1660                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1661                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1662                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1663                         /*
1664                          * We need to save a lock here to let us add it again to
1665                          * the file's list if the unlock range request fails on
1666                          * the server.
1667                          */
1668                         list_move(&li->llist, &tmp_llist);
1669                         if (++num == max_num) {
1670                                 stored_rc = cifs_lockv(xid, tcon,
1671                                                        cfile->fid.netfid,
1672                                                        li->type, num, 0, buf);
1673                                 if (stored_rc) {
1674                                         /*
1675                                          * We failed on the unlock range
1676                                          * request - add all locks from the tmp
1677                                          * list to the head of the file's list.
1678                                          */
1679                                         cifs_move_llist(&tmp_llist,
1680                                                         &cfile->llist->locks);
1681                                         rc = stored_rc;
1682                                 } else
1683                                         /*
1684                                          * The unlock range request succeed -
1685                                          * free the tmp list.
1686                                          */
1687                                         cifs_free_llist(&tmp_llist);
1688                                 cur = buf;
1689                                 num = 0;
1690                         } else
1691                                 cur++;
1692                 }
1693                 if (num) {
1694                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1695                                                types[i], num, 0, buf);
1696                         if (stored_rc) {
1697                                 cifs_move_llist(&tmp_llist,
1698                                                 &cfile->llist->locks);
1699                                 rc = stored_rc;
1700                         } else
1701                                 cifs_free_llist(&tmp_llist);
1702                 }
1703         }
1704
1705         up_write(&cinode->lock_sem);
1706         kfree(buf);
1707         return rc;
1708 }
1709
1710 static int
1711 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1712            bool wait_flag, bool posix_lck, int lock, int unlock,
1713            unsigned int xid)
1714 {
1715         int rc = 0;
1716         __u64 length = cifs_flock_len(flock);
1717         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1718         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1719         struct TCP_Server_Info *server = tcon->ses->server;
1720         struct inode *inode = d_inode(cfile->dentry);
1721
1722         if (posix_lck) {
1723                 int posix_lock_type;
1724
1725                 rc = cifs_posix_lock_set(file, flock);
1726                 if (rc <= FILE_LOCK_DEFERRED)
1727                         return rc;
1728
1729                 if (type & server->vals->shared_lock_type)
1730                         posix_lock_type = CIFS_RDLCK;
1731                 else
1732                         posix_lock_type = CIFS_WRLCK;
1733
1734                 if (unlock == 1)
1735                         posix_lock_type = CIFS_UNLCK;
1736
1737                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1738                                       hash_lockowner(flock->fl_owner),
1739                                       flock->fl_start, length,
1740                                       NULL, posix_lock_type, wait_flag);
1741                 goto out;
1742         }
1743
1744         if (lock) {
1745                 struct cifsLockInfo *lock;
1746
1747                 lock = cifs_lock_init(flock->fl_start, length, type,
1748                                       flock->fl_flags);
1749                 if (!lock)
1750                         return -ENOMEM;
1751
1752                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1753                 if (rc < 0) {
1754                         kfree(lock);
1755                         return rc;
1756                 }
1757                 if (!rc)
1758                         goto out;
1759
1760                 /*
1761                  * Windows 7 server can delay breaking lease from read to None
1762                  * if we set a byte-range lock on a file - break it explicitly
1763                  * before sending the lock to the server to be sure the next
1764                  * read won't conflict with non-overlapted locks due to
1765                  * pagereading.
1766                  */
1767                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1768                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1769                         cifs_zap_mapping(inode);
1770                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1771                                  inode);
1772                         CIFS_I(inode)->oplock = 0;
1773                 }
1774
1775                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1776                                             type, 1, 0, wait_flag);
1777                 if (rc) {
1778                         kfree(lock);
1779                         return rc;
1780                 }
1781
1782                 cifs_lock_add(cfile, lock);
1783         } else if (unlock)
1784                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1785
1786 out:
1787         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1788                 /*
1789                  * If this is a request to remove all locks because we
1790                  * are closing the file, it doesn't matter if the
1791                  * unlocking failed as both cifs.ko and the SMB server
1792                  * remove the lock on file close
1793                  */
1794                 if (rc) {
1795                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1796                         if (!(flock->fl_flags & FL_CLOSE))
1797                                 return rc;
1798                 }
1799                 rc = locks_lock_file_wait(file, flock);
1800         }
1801         return rc;
1802 }
1803
1804 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1805 {
1806         int rc, xid;
1807         int lock = 0, unlock = 0;
1808         bool wait_flag = false;
1809         bool posix_lck = false;
1810         struct cifs_sb_info *cifs_sb;
1811         struct cifs_tcon *tcon;
1812         struct cifsFileInfo *cfile;
1813         __u32 type;
1814
1815         rc = -EACCES;
1816         xid = get_xid();
1817
1818         if (!(fl->fl_flags & FL_FLOCK))
1819                 return -ENOLCK;
1820
1821         cfile = (struct cifsFileInfo *)file->private_data;
1822         tcon = tlink_tcon(cfile->tlink);
1823
1824         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1825                         tcon->ses->server);
1826         cifs_sb = CIFS_FILE_SB(file);
1827
1828         if (cap_unix(tcon->ses) &&
1829             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1830             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1831                 posix_lck = true;
1832
1833         if (!lock && !unlock) {
1834                 /*
1835                  * if no lock or unlock then nothing to do since we do not
1836                  * know what it is
1837                  */
1838                 free_xid(xid);
1839                 return -EOPNOTSUPP;
1840         }
1841
1842         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1843                         xid);
1844         free_xid(xid);
1845         return rc;
1846
1847
1848 }
1849
1850 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1851 {
1852         int rc, xid;
1853         int lock = 0, unlock = 0;
1854         bool wait_flag = false;
1855         bool posix_lck = false;
1856         struct cifs_sb_info *cifs_sb;
1857         struct cifs_tcon *tcon;
1858         struct cifsFileInfo *cfile;
1859         __u32 type;
1860
1861         rc = -EACCES;
1862         xid = get_xid();
1863
1864         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1865                  cmd, flock->fl_flags, flock->fl_type,
1866                  flock->fl_start, flock->fl_end);
1867
1868         cfile = (struct cifsFileInfo *)file->private_data;
1869         tcon = tlink_tcon(cfile->tlink);
1870
1871         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1872                         tcon->ses->server);
1873         cifs_sb = CIFS_FILE_SB(file);
1874         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1875
1876         if (cap_unix(tcon->ses) &&
1877             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1878             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1879                 posix_lck = true;
1880         /*
1881          * BB add code here to normalize offset and length to account for
1882          * negative length which we can not accept over the wire.
1883          */
1884         if (IS_GETLK(cmd)) {
1885                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1886                 free_xid(xid);
1887                 return rc;
1888         }
1889
1890         if (!lock && !unlock) {
1891                 /*
1892                  * if no lock or unlock then nothing to do since we do not
1893                  * know what it is
1894                  */
1895                 free_xid(xid);
1896                 return -EOPNOTSUPP;
1897         }
1898
1899         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1900                         xid);
1901         free_xid(xid);
1902         return rc;
1903 }
1904
1905 /*
1906  * update the file size (if needed) after a write. Should be called with
1907  * the inode->i_lock held
1908  */
1909 void
1910 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1911                       unsigned int bytes_written)
1912 {
1913         loff_t end_of_write = offset + bytes_written;
1914
1915         if (end_of_write > cifsi->server_eof)
1916                 cifsi->server_eof = end_of_write;
1917 }
1918
1919 static ssize_t
1920 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1921            size_t write_size, loff_t *offset)
1922 {
1923         int rc = 0;
1924         unsigned int bytes_written = 0;
1925         unsigned int total_written;
1926         struct cifs_tcon *tcon;
1927         struct TCP_Server_Info *server;
1928         unsigned int xid;
1929         struct dentry *dentry = open_file->dentry;
1930         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1931         struct cifs_io_parms io_parms = {0};
1932
1933         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1934                  write_size, *offset, dentry);
1935
1936         tcon = tlink_tcon(open_file->tlink);
1937         server = tcon->ses->server;
1938
1939         if (!server->ops->sync_write)
1940                 return -ENOSYS;
1941
1942         xid = get_xid();
1943
1944         for (total_written = 0; write_size > total_written;
1945              total_written += bytes_written) {
1946                 rc = -EAGAIN;
1947                 while (rc == -EAGAIN) {
1948                         struct kvec iov[2];
1949                         unsigned int len;
1950
1951                         if (open_file->invalidHandle) {
1952                                 /* we could deadlock if we called
1953                                    filemap_fdatawait from here so tell
1954                                    reopen_file not to flush data to
1955                                    server now */
1956                                 rc = cifs_reopen_file(open_file, false);
1957                                 if (rc != 0)
1958                                         break;
1959                         }
1960
1961                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1962                                   (unsigned int)write_size - total_written);
1963                         /* iov[0] is reserved for smb header */
1964                         iov[1].iov_base = (char *)write_data + total_written;
1965                         iov[1].iov_len = len;
1966                         io_parms.pid = pid;
1967                         io_parms.tcon = tcon;
1968                         io_parms.offset = *offset;
1969                         io_parms.length = len;
1970                         rc = server->ops->sync_write(xid, &open_file->fid,
1971                                         &io_parms, &bytes_written, iov, 1);
1972                 }
1973                 if (rc || (bytes_written == 0)) {
1974                         if (total_written)
1975                                 break;
1976                         else {
1977                                 free_xid(xid);
1978                                 return rc;
1979                         }
1980                 } else {
1981                         spin_lock(&d_inode(dentry)->i_lock);
1982                         cifs_update_eof(cifsi, *offset, bytes_written);
1983                         spin_unlock(&d_inode(dentry)->i_lock);
1984                         *offset += bytes_written;
1985                 }
1986         }
1987
1988         cifs_stats_bytes_written(tcon, total_written);
1989
1990         if (total_written > 0) {
1991                 spin_lock(&d_inode(dentry)->i_lock);
1992                 if (*offset > d_inode(dentry)->i_size) {
1993                         i_size_write(d_inode(dentry), *offset);
1994                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1995                 }
1996                 spin_unlock(&d_inode(dentry)->i_lock);
1997         }
1998         mark_inode_dirty_sync(d_inode(dentry));
1999         free_xid(xid);
2000         return total_written;
2001 }
2002
2003 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2004                                         bool fsuid_only)
2005 {
2006         struct cifsFileInfo *open_file = NULL;
2007         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2008
2009         /* only filter by fsuid on multiuser mounts */
2010         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2011                 fsuid_only = false;
2012
2013         spin_lock(&cifs_inode->open_file_lock);
2014         /* we could simply get the first_list_entry since write-only entries
2015            are always at the end of the list but since the first entry might
2016            have a close pending, we go through the whole list */
2017         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2018                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2019                         continue;
2020                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2021                         if ((!open_file->invalidHandle)) {
2022                                 /* found a good file */
2023                                 /* lock it so it will not be closed on us */
2024                                 cifsFileInfo_get(open_file);
2025                                 spin_unlock(&cifs_inode->open_file_lock);
2026                                 return open_file;
2027                         } /* else might as well continue, and look for
2028                              another, or simply have the caller reopen it
2029                              again rather than trying to fix this handle */
2030                 } else /* write only file */
2031                         break; /* write only files are last so must be done */
2032         }
2033         spin_unlock(&cifs_inode->open_file_lock);
2034         return NULL;
2035 }
2036
2037 /* Return -EBADF if no handle is found and general rc otherwise */
2038 int
2039 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2040                        struct cifsFileInfo **ret_file)
2041 {
2042         struct cifsFileInfo *open_file, *inv_file = NULL;
2043         struct cifs_sb_info *cifs_sb;
2044         bool any_available = false;
2045         int rc = -EBADF;
2046         unsigned int refind = 0;
2047         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2048         bool with_delete = flags & FIND_WR_WITH_DELETE;
2049         *ret_file = NULL;
2050
2051         /*
2052          * Having a null inode here (because mapping->host was set to zero by
2053          * the VFS or MM) should not happen but we had reports of on oops (due
2054          * to it being zero) during stress testcases so we need to check for it
2055          */
2056
2057         if (cifs_inode == NULL) {
2058                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2059                 dump_stack();
2060                 return rc;
2061         }
2062
2063         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2064
2065         /* only filter by fsuid on multiuser mounts */
2066         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2067                 fsuid_only = false;
2068
2069         spin_lock(&cifs_inode->open_file_lock);
2070 refind_writable:
2071         if (refind > MAX_REOPEN_ATT) {
2072                 spin_unlock(&cifs_inode->open_file_lock);
2073                 return rc;
2074         }
2075         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2076                 if (!any_available && open_file->pid != current->tgid)
2077                         continue;
2078                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2079                         continue;
2080                 if (with_delete && !(open_file->fid.access & DELETE))
2081                         continue;
2082                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2083                         if (!open_file->invalidHandle) {
2084                                 /* found a good writable file */
2085                                 cifsFileInfo_get(open_file);
2086                                 spin_unlock(&cifs_inode->open_file_lock);
2087                                 *ret_file = open_file;
2088                                 return 0;
2089                         } else {
2090                                 if (!inv_file)
2091                                         inv_file = open_file;
2092                         }
2093                 }
2094         }
2095         /* couldn't find useable FH with same pid, try any available */
2096         if (!any_available) {
2097                 any_available = true;
2098                 goto refind_writable;
2099         }
2100
2101         if (inv_file) {
2102                 any_available = false;
2103                 cifsFileInfo_get(inv_file);
2104         }
2105
2106         spin_unlock(&cifs_inode->open_file_lock);
2107
2108         if (inv_file) {
2109                 rc = cifs_reopen_file(inv_file, false);
2110                 if (!rc) {
2111                         *ret_file = inv_file;
2112                         return 0;
2113                 }
2114
2115                 spin_lock(&cifs_inode->open_file_lock);
2116                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2117                 spin_unlock(&cifs_inode->open_file_lock);
2118                 cifsFileInfo_put(inv_file);
2119                 ++refind;
2120                 inv_file = NULL;
2121                 spin_lock(&cifs_inode->open_file_lock);
2122                 goto refind_writable;
2123         }
2124
2125         return rc;
2126 }
2127
2128 struct cifsFileInfo *
2129 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2130 {
2131         struct cifsFileInfo *cfile;
2132         int rc;
2133
2134         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2135         if (rc)
2136                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2137
2138         return cfile;
2139 }
2140
2141 int
2142 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2143                        int flags,
2144                        struct cifsFileInfo **ret_file)
2145 {
2146         struct cifsFileInfo *cfile;
2147         void *page = alloc_dentry_path();
2148
2149         *ret_file = NULL;
2150
2151         spin_lock(&tcon->open_file_lock);
2152         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2153                 struct cifsInodeInfo *cinode;
2154                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2155                 if (IS_ERR(full_path)) {
2156                         spin_unlock(&tcon->open_file_lock);
2157                         free_dentry_path(page);
2158                         return PTR_ERR(full_path);
2159                 }
2160                 if (strcmp(full_path, name))
2161                         continue;
2162
2163                 cinode = CIFS_I(d_inode(cfile->dentry));
2164                 spin_unlock(&tcon->open_file_lock);
2165                 free_dentry_path(page);
2166                 return cifs_get_writable_file(cinode, flags, ret_file);
2167         }
2168
2169         spin_unlock(&tcon->open_file_lock);
2170         free_dentry_path(page);
2171         return -ENOENT;
2172 }
2173
2174 int
2175 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2176                        struct cifsFileInfo **ret_file)
2177 {
2178         struct cifsFileInfo *cfile;
2179         void *page = alloc_dentry_path();
2180
2181         *ret_file = NULL;
2182
2183         spin_lock(&tcon->open_file_lock);
2184         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2185                 struct cifsInodeInfo *cinode;
2186                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2187                 if (IS_ERR(full_path)) {
2188                         spin_unlock(&tcon->open_file_lock);
2189                         free_dentry_path(page);
2190                         return PTR_ERR(full_path);
2191                 }
2192                 if (strcmp(full_path, name))
2193                         continue;
2194
2195                 cinode = CIFS_I(d_inode(cfile->dentry));
2196                 spin_unlock(&tcon->open_file_lock);
2197                 free_dentry_path(page);
2198                 *ret_file = find_readable_file(cinode, 0);
2199                 return *ret_file ? 0 : -ENOENT;
2200         }
2201
2202         spin_unlock(&tcon->open_file_lock);
2203         free_dentry_path(page);
2204         return -ENOENT;
2205 }
2206
2207 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2208 {
2209         struct address_space *mapping = page->mapping;
2210         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2211         char *write_data;
2212         int rc = -EFAULT;
2213         int bytes_written = 0;
2214         struct inode *inode;
2215         struct cifsFileInfo *open_file;
2216
2217         if (!mapping || !mapping->host)
2218                 return -EFAULT;
2219
2220         inode = page->mapping->host;
2221
2222         offset += (loff_t)from;
2223         write_data = kmap(page);
2224         write_data += from;
2225
2226         if ((to > PAGE_SIZE) || (from > to)) {
2227                 kunmap(page);
2228                 return -EIO;
2229         }
2230
2231         /* racing with truncate? */
2232         if (offset > mapping->host->i_size) {
2233                 kunmap(page);
2234                 return 0; /* don't care */
2235         }
2236
2237         /* check to make sure that we are not extending the file */
2238         if (mapping->host->i_size - offset < (loff_t)to)
2239                 to = (unsigned)(mapping->host->i_size - offset);
2240
2241         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2242                                     &open_file);
2243         if (!rc) {
2244                 bytes_written = cifs_write(open_file, open_file->pid,
2245                                            write_data, to - from, &offset);
2246                 cifsFileInfo_put(open_file);
2247                 /* Does mm or vfs already set times? */
2248                 inode->i_atime = inode->i_mtime = current_time(inode);
2249                 if ((bytes_written > 0) && (offset))
2250                         rc = 0;
2251                 else if (bytes_written < 0)
2252                         rc = bytes_written;
2253                 else
2254                         rc = -EFAULT;
2255         } else {
2256                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2257                 if (!is_retryable_error(rc))
2258                         rc = -EIO;
2259         }
2260
2261         kunmap(page);
2262         return rc;
2263 }
2264
2265 static struct cifs_writedata *
2266 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2267                           pgoff_t end, pgoff_t *index,
2268                           unsigned int *found_pages)
2269 {
2270         struct cifs_writedata *wdata;
2271
2272         wdata = cifs_writedata_alloc((unsigned int)tofind,
2273                                      cifs_writev_complete);
2274         if (!wdata)
2275                 return NULL;
2276
2277         *found_pages = find_get_pages_range_tag(mapping, index, end,
2278                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2279         return wdata;
2280 }
2281
2282 static unsigned int
2283 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2284                     struct address_space *mapping,
2285                     struct writeback_control *wbc,
2286                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2287 {
2288         unsigned int nr_pages = 0, i;
2289         struct page *page;
2290
2291         for (i = 0; i < found_pages; i++) {
2292                 page = wdata->pages[i];
2293                 /*
2294                  * At this point we hold neither the i_pages lock nor the
2295                  * page lock: the page may be truncated or invalidated
2296                  * (changing page->mapping to NULL), or even swizzled
2297                  * back from swapper_space to tmpfs file mapping
2298                  */
2299
2300                 if (nr_pages == 0)
2301                         lock_page(page);
2302                 else if (!trylock_page(page))
2303                         break;
2304
2305                 if (unlikely(page->mapping != mapping)) {
2306                         unlock_page(page);
2307                         break;
2308                 }
2309
2310                 if (!wbc->range_cyclic && page->index > end) {
2311                         *done = true;
2312                         unlock_page(page);
2313                         break;
2314                 }
2315
2316                 if (*next && (page->index != *next)) {
2317                         /* Not next consecutive page */
2318                         unlock_page(page);
2319                         break;
2320                 }
2321
2322                 if (wbc->sync_mode != WB_SYNC_NONE)
2323                         wait_on_page_writeback(page);
2324
2325                 if (PageWriteback(page) ||
2326                                 !clear_page_dirty_for_io(page)) {
2327                         unlock_page(page);
2328                         break;
2329                 }
2330
2331                 /*
2332                  * This actually clears the dirty bit in the radix tree.
2333                  * See cifs_writepage() for more commentary.
2334                  */
2335                 set_page_writeback(page);
2336                 if (page_offset(page) >= i_size_read(mapping->host)) {
2337                         *done = true;
2338                         unlock_page(page);
2339                         end_page_writeback(page);
2340                         break;
2341                 }
2342
2343                 wdata->pages[i] = page;
2344                 *next = page->index + 1;
2345                 ++nr_pages;
2346         }
2347
2348         /* reset index to refind any pages skipped */
2349         if (nr_pages == 0)
2350                 *index = wdata->pages[0]->index + 1;
2351
2352         /* put any pages we aren't going to use */
2353         for (i = nr_pages; i < found_pages; i++) {
2354                 put_page(wdata->pages[i]);
2355                 wdata->pages[i] = NULL;
2356         }
2357
2358         return nr_pages;
2359 }
2360
2361 static int
2362 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2363                  struct address_space *mapping, struct writeback_control *wbc)
2364 {
2365         int rc;
2366
2367         wdata->sync_mode = wbc->sync_mode;
2368         wdata->nr_pages = nr_pages;
2369         wdata->offset = page_offset(wdata->pages[0]);
2370         wdata->pagesz = PAGE_SIZE;
2371         wdata->tailsz = min(i_size_read(mapping->host) -
2372                         page_offset(wdata->pages[nr_pages - 1]),
2373                         (loff_t)PAGE_SIZE);
2374         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2375         wdata->pid = wdata->cfile->pid;
2376
2377         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2378         if (rc)
2379                 return rc;
2380
2381         if (wdata->cfile->invalidHandle)
2382                 rc = -EAGAIN;
2383         else
2384                 rc = wdata->server->ops->async_writev(wdata,
2385                                                       cifs_writedata_release);
2386
2387         return rc;
2388 }
2389
2390 static int cifs_writepages(struct address_space *mapping,
2391                            struct writeback_control *wbc)
2392 {
2393         struct inode *inode = mapping->host;
2394         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2395         struct TCP_Server_Info *server;
2396         bool done = false, scanned = false, range_whole = false;
2397         pgoff_t end, index;
2398         struct cifs_writedata *wdata;
2399         struct cifsFileInfo *cfile = NULL;
2400         int rc = 0;
2401         int saved_rc = 0;
2402         unsigned int xid;
2403
2404         /*
2405          * If wsize is smaller than the page cache size, default to writing
2406          * one page at a time via cifs_writepage
2407          */
2408         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2409                 return generic_writepages(mapping, wbc);
2410
2411         xid = get_xid();
2412         if (wbc->range_cyclic) {
2413                 index = mapping->writeback_index; /* Start from prev offset */
2414                 end = -1;
2415         } else {
2416                 index = wbc->range_start >> PAGE_SHIFT;
2417                 end = wbc->range_end >> PAGE_SHIFT;
2418                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2419                         range_whole = true;
2420                 scanned = true;
2421         }
2422         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2423
2424 retry:
2425         while (!done && index <= end) {
2426                 unsigned int i, nr_pages, found_pages, wsize;
2427                 pgoff_t next = 0, tofind, saved_index = index;
2428                 struct cifs_credits credits_on_stack;
2429                 struct cifs_credits *credits = &credits_on_stack;
2430                 int get_file_rc = 0;
2431
2432                 if (cfile)
2433                         cifsFileInfo_put(cfile);
2434
2435                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2436
2437                 /* in case of an error store it to return later */
2438                 if (rc)
2439                         get_file_rc = rc;
2440
2441                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2442                                                    &wsize, credits);
2443                 if (rc != 0) {
2444                         done = true;
2445                         break;
2446                 }
2447
2448                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2449
2450                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2451                                                   &found_pages);
2452                 if (!wdata) {
2453                         rc = -ENOMEM;
2454                         done = true;
2455                         add_credits_and_wake_if(server, credits, 0);
2456                         break;
2457                 }
2458
2459                 if (found_pages == 0) {
2460                         kref_put(&wdata->refcount, cifs_writedata_release);
2461                         add_credits_and_wake_if(server, credits, 0);
2462                         break;
2463                 }
2464
2465                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2466                                                end, &index, &next, &done);
2467
2468                 /* nothing to write? */
2469                 if (nr_pages == 0) {
2470                         kref_put(&wdata->refcount, cifs_writedata_release);
2471                         add_credits_and_wake_if(server, credits, 0);
2472                         continue;
2473                 }
2474
2475                 wdata->credits = credits_on_stack;
2476                 wdata->cfile = cfile;
2477                 wdata->server = server;
2478                 cfile = NULL;
2479
2480                 if (!wdata->cfile) {
2481                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2482                                  get_file_rc);
2483                         if (is_retryable_error(get_file_rc))
2484                                 rc = get_file_rc;
2485                         else
2486                                 rc = -EBADF;
2487                 } else
2488                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2489
2490                 for (i = 0; i < nr_pages; ++i)
2491                         unlock_page(wdata->pages[i]);
2492
2493                 /* send failure -- clean up the mess */
2494                 if (rc != 0) {
2495                         add_credits_and_wake_if(server, &wdata->credits, 0);
2496                         for (i = 0; i < nr_pages; ++i) {
2497                                 if (is_retryable_error(rc))
2498                                         redirty_page_for_writepage(wbc,
2499                                                            wdata->pages[i]);
2500                                 else
2501                                         SetPageError(wdata->pages[i]);
2502                                 end_page_writeback(wdata->pages[i]);
2503                                 put_page(wdata->pages[i]);
2504                         }
2505                         if (!is_retryable_error(rc))
2506                                 mapping_set_error(mapping, rc);
2507                 }
2508                 kref_put(&wdata->refcount, cifs_writedata_release);
2509
2510                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2511                         index = saved_index;
2512                         continue;
2513                 }
2514
2515                 /* Return immediately if we received a signal during writing */
2516                 if (is_interrupt_error(rc)) {
2517                         done = true;
2518                         break;
2519                 }
2520
2521                 if (rc != 0 && saved_rc == 0)
2522                         saved_rc = rc;
2523
2524                 wbc->nr_to_write -= nr_pages;
2525                 if (wbc->nr_to_write <= 0)
2526                         done = true;
2527
2528                 index = next;
2529         }
2530
2531         if (!scanned && !done) {
2532                 /*
2533                  * We hit the last page and there is more work to be done: wrap
2534                  * back to the start of the file
2535                  */
2536                 scanned = true;
2537                 index = 0;
2538                 goto retry;
2539         }
2540
2541         if (saved_rc != 0)
2542                 rc = saved_rc;
2543
2544         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2545                 mapping->writeback_index = index;
2546
2547         if (cfile)
2548                 cifsFileInfo_put(cfile);
2549         free_xid(xid);
2550         /* Indication to update ctime and mtime as close is deferred */
2551         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2552         return rc;
2553 }
2554
2555 static int
2556 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2557 {
2558         int rc;
2559         unsigned int xid;
2560
2561         xid = get_xid();
2562 /* BB add check for wbc flags */
2563         get_page(page);
2564         if (!PageUptodate(page))
2565                 cifs_dbg(FYI, "ppw - page not up to date\n");
2566
2567         /*
2568          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2569          *
2570          * A writepage() implementation always needs to do either this,
2571          * or re-dirty the page with "redirty_page_for_writepage()" in
2572          * the case of a failure.
2573          *
2574          * Just unlocking the page will cause the radix tree tag-bits
2575          * to fail to update with the state of the page correctly.
2576          */
2577         set_page_writeback(page);
2578 retry_write:
2579         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2580         if (is_retryable_error(rc)) {
2581                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2582                         goto retry_write;
2583                 redirty_page_for_writepage(wbc, page);
2584         } else if (rc != 0) {
2585                 SetPageError(page);
2586                 mapping_set_error(page->mapping, rc);
2587         } else {
2588                 SetPageUptodate(page);
2589         }
2590         end_page_writeback(page);
2591         put_page(page);
2592         free_xid(xid);
2593         return rc;
2594 }
2595
2596 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2597 {
2598         int rc = cifs_writepage_locked(page, wbc);
2599         unlock_page(page);
2600         return rc;
2601 }
2602
2603 static int cifs_write_end(struct file *file, struct address_space *mapping,
2604                         loff_t pos, unsigned len, unsigned copied,
2605                         struct page *page, void *fsdata)
2606 {
2607         int rc;
2608         struct inode *inode = mapping->host;
2609         struct cifsFileInfo *cfile = file->private_data;
2610         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2611         __u32 pid;
2612
2613         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2614                 pid = cfile->pid;
2615         else
2616                 pid = current->tgid;
2617
2618         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2619                  page, pos, copied);
2620
2621         if (PageChecked(page)) {
2622                 if (copied == len)
2623                         SetPageUptodate(page);
2624                 ClearPageChecked(page);
2625         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2626                 SetPageUptodate(page);
2627
2628         if (!PageUptodate(page)) {
2629                 char *page_data;
2630                 unsigned offset = pos & (PAGE_SIZE - 1);
2631                 unsigned int xid;
2632
2633                 xid = get_xid();
2634                 /* this is probably better than directly calling
2635                    partialpage_write since in this function the file handle is
2636                    known which we might as well leverage */
2637                 /* BB check if anything else missing out of ppw
2638                    such as updating last write time */
2639                 page_data = kmap(page);
2640                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2641                 /* if (rc < 0) should we set writebehind rc? */
2642                 kunmap(page);
2643
2644                 free_xid(xid);
2645         } else {
2646                 rc = copied;
2647                 pos += copied;
2648                 set_page_dirty(page);
2649         }
2650
2651         if (rc > 0) {
2652                 spin_lock(&inode->i_lock);
2653                 if (pos > inode->i_size) {
2654                         i_size_write(inode, pos);
2655                         inode->i_blocks = (512 - 1 + pos) >> 9;
2656                 }
2657                 spin_unlock(&inode->i_lock);
2658         }
2659
2660         unlock_page(page);
2661         put_page(page);
2662         /* Indication to update ctime and mtime as close is deferred */
2663         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2664
2665         return rc;
2666 }
2667
2668 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2669                       int datasync)
2670 {
2671         unsigned int xid;
2672         int rc = 0;
2673         struct cifs_tcon *tcon;
2674         struct TCP_Server_Info *server;
2675         struct cifsFileInfo *smbfile = file->private_data;
2676         struct inode *inode = file_inode(file);
2677         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2678
2679         rc = file_write_and_wait_range(file, start, end);
2680         if (rc) {
2681                 trace_cifs_fsync_err(inode->i_ino, rc);
2682                 return rc;
2683         }
2684
2685         xid = get_xid();
2686
2687         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2688                  file, datasync);
2689
2690         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2691                 rc = cifs_zap_mapping(inode);
2692                 if (rc) {
2693                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2694                         rc = 0; /* don't care about it in fsync */
2695                 }
2696         }
2697
2698         tcon = tlink_tcon(smbfile->tlink);
2699         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2700                 server = tcon->ses->server;
2701                 if (server->ops->flush == NULL) {
2702                         rc = -ENOSYS;
2703                         goto strict_fsync_exit;
2704                 }
2705
2706                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2707                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2708                         if (smbfile) {
2709                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2710                                 cifsFileInfo_put(smbfile);
2711                         } else
2712                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2713                 } else
2714                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2715         }
2716
2717 strict_fsync_exit:
2718         free_xid(xid);
2719         return rc;
2720 }
2721
2722 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2723 {
2724         unsigned int xid;
2725         int rc = 0;
2726         struct cifs_tcon *tcon;
2727         struct TCP_Server_Info *server;
2728         struct cifsFileInfo *smbfile = file->private_data;
2729         struct inode *inode = file_inode(file);
2730         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2731
2732         rc = file_write_and_wait_range(file, start, end);
2733         if (rc) {
2734                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2735                 return rc;
2736         }
2737
2738         xid = get_xid();
2739
2740         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2741                  file, datasync);
2742
2743         tcon = tlink_tcon(smbfile->tlink);
2744         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2745                 server = tcon->ses->server;
2746                 if (server->ops->flush == NULL) {
2747                         rc = -ENOSYS;
2748                         goto fsync_exit;
2749                 }
2750
2751                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2752                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2753                         if (smbfile) {
2754                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2755                                 cifsFileInfo_put(smbfile);
2756                         } else
2757                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2758                 } else
2759                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2760         }
2761
2762 fsync_exit:
2763         free_xid(xid);
2764         return rc;
2765 }
2766
2767 /*
2768  * As file closes, flush all cached write data for this inode checking
2769  * for write behind errors.
2770  */
2771 int cifs_flush(struct file *file, fl_owner_t id)
2772 {
2773         struct inode *inode = file_inode(file);
2774         int rc = 0;
2775
2776         if (file->f_mode & FMODE_WRITE)
2777                 rc = filemap_write_and_wait(inode->i_mapping);
2778
2779         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2780         if (rc) {
2781                 /* get more nuanced writeback errors */
2782                 rc = filemap_check_wb_err(file->f_mapping, 0);
2783                 trace_cifs_flush_err(inode->i_ino, rc);
2784         }
2785         return rc;
2786 }
2787
2788 static int
2789 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2790 {
2791         int rc = 0;
2792         unsigned long i;
2793
2794         for (i = 0; i < num_pages; i++) {
2795                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2796                 if (!pages[i]) {
2797                         /*
2798                          * save number of pages we have already allocated and
2799                          * return with ENOMEM error
2800                          */
2801                         num_pages = i;
2802                         rc = -ENOMEM;
2803                         break;
2804                 }
2805         }
2806
2807         if (rc) {
2808                 for (i = 0; i < num_pages; i++)
2809                         put_page(pages[i]);
2810         }
2811         return rc;
2812 }
2813
2814 static inline
2815 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2816 {
2817         size_t num_pages;
2818         size_t clen;
2819
2820         clen = min_t(const size_t, len, wsize);
2821         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2822
2823         if (cur_len)
2824                 *cur_len = clen;
2825
2826         return num_pages;
2827 }
2828
2829 static void
2830 cifs_uncached_writedata_release(struct kref *refcount)
2831 {
2832         int i;
2833         struct cifs_writedata *wdata = container_of(refcount,
2834                                         struct cifs_writedata, refcount);
2835
2836         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2837         for (i = 0; i < wdata->nr_pages; i++)
2838                 put_page(wdata->pages[i]);
2839         cifs_writedata_release(refcount);
2840 }
2841
2842 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2843
2844 static void
2845 cifs_uncached_writev_complete(struct work_struct *work)
2846 {
2847         struct cifs_writedata *wdata = container_of(work,
2848                                         struct cifs_writedata, work);
2849         struct inode *inode = d_inode(wdata->cfile->dentry);
2850         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2851
2852         spin_lock(&inode->i_lock);
2853         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2854         if (cifsi->server_eof > inode->i_size)
2855                 i_size_write(inode, cifsi->server_eof);
2856         spin_unlock(&inode->i_lock);
2857
2858         complete(&wdata->done);
2859         collect_uncached_write_data(wdata->ctx);
2860         /* the below call can possibly free the last ref to aio ctx */
2861         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2862 }
2863
2864 static int
2865 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2866                       size_t *len, unsigned long *num_pages)
2867 {
2868         size_t save_len, copied, bytes, cur_len = *len;
2869         unsigned long i, nr_pages = *num_pages;
2870
2871         save_len = cur_len;
2872         for (i = 0; i < nr_pages; i++) {
2873                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2874                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2875                 cur_len -= copied;
2876                 /*
2877                  * If we didn't copy as much as we expected, then that
2878                  * may mean we trod into an unmapped area. Stop copying
2879                  * at that point. On the next pass through the big
2880                  * loop, we'll likely end up getting a zero-length
2881                  * write and bailing out of it.
2882                  */
2883                 if (copied < bytes)
2884                         break;
2885         }
2886         cur_len = save_len - cur_len;
2887         *len = cur_len;
2888
2889         /*
2890          * If we have no data to send, then that probably means that
2891          * the copy above failed altogether. That's most likely because
2892          * the address in the iovec was bogus. Return -EFAULT and let
2893          * the caller free anything we allocated and bail out.
2894          */
2895         if (!cur_len)
2896                 return -EFAULT;
2897
2898         /*
2899          * i + 1 now represents the number of pages we actually used in
2900          * the copy phase above.
2901          */
2902         *num_pages = i + 1;
2903         return 0;
2904 }
2905
2906 static int
2907 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2908         struct cifs_aio_ctx *ctx)
2909 {
2910         unsigned int wsize;
2911         struct cifs_credits credits;
2912         int rc;
2913         struct TCP_Server_Info *server = wdata->server;
2914
2915         do {
2916                 if (wdata->cfile->invalidHandle) {
2917                         rc = cifs_reopen_file(wdata->cfile, false);
2918                         if (rc == -EAGAIN)
2919                                 continue;
2920                         else if (rc)
2921                                 break;
2922                 }
2923
2924
2925                 /*
2926                  * Wait for credits to resend this wdata.
2927                  * Note: we are attempting to resend the whole wdata not in
2928                  * segments
2929                  */
2930                 do {
2931                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2932                                                 &wsize, &credits);
2933                         if (rc)
2934                                 goto fail;
2935
2936                         if (wsize < wdata->bytes) {
2937                                 add_credits_and_wake_if(server, &credits, 0);
2938                                 msleep(1000);
2939                         }
2940                 } while (wsize < wdata->bytes);
2941                 wdata->credits = credits;
2942
2943                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2944
2945                 if (!rc) {
2946                         if (wdata->cfile->invalidHandle)
2947                                 rc = -EAGAIN;
2948                         else {
2949 #ifdef CONFIG_CIFS_SMB_DIRECT
2950                                 if (wdata->mr) {
2951                                         wdata->mr->need_invalidate = true;
2952                                         smbd_deregister_mr(wdata->mr);
2953                                         wdata->mr = NULL;
2954                                 }
2955 #endif
2956                                 rc = server->ops->async_writev(wdata,
2957                                         cifs_uncached_writedata_release);
2958                         }
2959                 }
2960
2961                 /* If the write was successfully sent, we are done */
2962                 if (!rc) {
2963                         list_add_tail(&wdata->list, wdata_list);
2964                         return 0;
2965                 }
2966
2967                 /* Roll back credits and retry if needed */
2968                 add_credits_and_wake_if(server, &wdata->credits, 0);
2969         } while (rc == -EAGAIN);
2970
2971 fail:
2972         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2973         return rc;
2974 }
2975
2976 static int
2977 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2978                      struct cifsFileInfo *open_file,
2979                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2980                      struct cifs_aio_ctx *ctx)
2981 {
2982         int rc = 0;
2983         size_t cur_len;
2984         unsigned long nr_pages, num_pages, i;
2985         struct cifs_writedata *wdata;
2986         struct iov_iter saved_from = *from;
2987         loff_t saved_offset = offset;
2988         pid_t pid;
2989         struct TCP_Server_Info *server;
2990         struct page **pagevec;
2991         size_t start;
2992         unsigned int xid;
2993
2994         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2995                 pid = open_file->pid;
2996         else
2997                 pid = current->tgid;
2998
2999         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3000         xid = get_xid();
3001
3002         do {
3003                 unsigned int wsize;
3004                 struct cifs_credits credits_on_stack;
3005                 struct cifs_credits *credits = &credits_on_stack;
3006
3007                 if (open_file->invalidHandle) {
3008                         rc = cifs_reopen_file(open_file, false);
3009                         if (rc == -EAGAIN)
3010                                 continue;
3011                         else if (rc)
3012                                 break;
3013                 }
3014
3015                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3016                                                    &wsize, credits);
3017                 if (rc)
3018                         break;
3019
3020                 cur_len = min_t(const size_t, len, wsize);
3021
3022                 if (ctx->direct_io) {
3023                         ssize_t result;
3024
3025                         result = iov_iter_get_pages_alloc(
3026                                 from, &pagevec, cur_len, &start);
3027                         if (result < 0) {
3028                                 cifs_dbg(VFS,
3029                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3030                                          result, iov_iter_type(from),
3031                                          from->iov_offset, from->count);
3032                                 dump_stack();
3033
3034                                 rc = result;
3035                                 add_credits_and_wake_if(server, credits, 0);
3036                                 break;
3037                         }
3038                         cur_len = (size_t)result;
3039                         iov_iter_advance(from, cur_len);
3040
3041                         nr_pages =
3042                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3043
3044                         wdata = cifs_writedata_direct_alloc(pagevec,
3045                                              cifs_uncached_writev_complete);
3046                         if (!wdata) {
3047                                 rc = -ENOMEM;
3048                                 add_credits_and_wake_if(server, credits, 0);
3049                                 break;
3050                         }
3051
3052
3053                         wdata->page_offset = start;
3054                         wdata->tailsz =
3055                                 nr_pages > 1 ?
3056                                         cur_len - (PAGE_SIZE - start) -
3057                                         (nr_pages - 2) * PAGE_SIZE :
3058                                         cur_len;
3059                 } else {
3060                         nr_pages = get_numpages(wsize, len, &cur_len);
3061                         wdata = cifs_writedata_alloc(nr_pages,
3062                                              cifs_uncached_writev_complete);
3063                         if (!wdata) {
3064                                 rc = -ENOMEM;
3065                                 add_credits_and_wake_if(server, credits, 0);
3066                                 break;
3067                         }
3068
3069                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3070                         if (rc) {
3071                                 kvfree(wdata->pages);
3072                                 kfree(wdata);
3073                                 add_credits_and_wake_if(server, credits, 0);
3074                                 break;
3075                         }
3076
3077                         num_pages = nr_pages;
3078                         rc = wdata_fill_from_iovec(
3079                                 wdata, from, &cur_len, &num_pages);
3080                         if (rc) {
3081                                 for (i = 0; i < nr_pages; i++)
3082                                         put_page(wdata->pages[i]);
3083                                 kvfree(wdata->pages);
3084                                 kfree(wdata);
3085                                 add_credits_and_wake_if(server, credits, 0);
3086                                 break;
3087                         }
3088
3089                         /*
3090                          * Bring nr_pages down to the number of pages we
3091                          * actually used, and free any pages that we didn't use.
3092                          */
3093                         for ( ; nr_pages > num_pages; nr_pages--)
3094                                 put_page(wdata->pages[nr_pages - 1]);
3095
3096                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3097                 }
3098
3099                 wdata->sync_mode = WB_SYNC_ALL;
3100                 wdata->nr_pages = nr_pages;
3101                 wdata->offset = (__u64)offset;
3102                 wdata->cfile = cifsFileInfo_get(open_file);
3103                 wdata->server = server;
3104                 wdata->pid = pid;
3105                 wdata->bytes = cur_len;
3106                 wdata->pagesz = PAGE_SIZE;
3107                 wdata->credits = credits_on_stack;
3108                 wdata->ctx = ctx;
3109                 kref_get(&ctx->refcount);
3110
3111                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3112
3113                 if (!rc) {
3114                         if (wdata->cfile->invalidHandle)
3115                                 rc = -EAGAIN;
3116                         else
3117                                 rc = server->ops->async_writev(wdata,
3118                                         cifs_uncached_writedata_release);
3119                 }
3120
3121                 if (rc) {
3122                         add_credits_and_wake_if(server, &wdata->credits, 0);
3123                         kref_put(&wdata->refcount,
3124                                  cifs_uncached_writedata_release);
3125                         if (rc == -EAGAIN) {
3126                                 *from = saved_from;
3127                                 iov_iter_advance(from, offset - saved_offset);
3128                                 continue;
3129                         }
3130                         break;
3131                 }
3132
3133                 list_add_tail(&wdata->list, wdata_list);
3134                 offset += cur_len;
3135                 len -= cur_len;
3136         } while (len > 0);
3137
3138         free_xid(xid);
3139         return rc;
3140 }
3141
3142 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3143 {
3144         struct cifs_writedata *wdata, *tmp;
3145         struct cifs_tcon *tcon;
3146         struct cifs_sb_info *cifs_sb;
3147         struct dentry *dentry = ctx->cfile->dentry;
3148         ssize_t rc;
3149
3150         tcon = tlink_tcon(ctx->cfile->tlink);
3151         cifs_sb = CIFS_SB(dentry->d_sb);
3152
3153         mutex_lock(&ctx->aio_mutex);
3154
3155         if (list_empty(&ctx->list)) {
3156                 mutex_unlock(&ctx->aio_mutex);
3157                 return;
3158         }
3159
3160         rc = ctx->rc;
3161         /*
3162          * Wait for and collect replies for any successful sends in order of
3163          * increasing offset. Once an error is hit, then return without waiting
3164          * for any more replies.
3165          */
3166 restart_loop:
3167         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3168                 if (!rc) {
3169                         if (!try_wait_for_completion(&wdata->done)) {
3170                                 mutex_unlock(&ctx->aio_mutex);
3171                                 return;
3172                         }
3173
3174                         if (wdata->result)
3175                                 rc = wdata->result;
3176                         else
3177                                 ctx->total_len += wdata->bytes;
3178
3179                         /* resend call if it's a retryable error */
3180                         if (rc == -EAGAIN) {
3181                                 struct list_head tmp_list;
3182                                 struct iov_iter tmp_from = ctx->iter;
3183
3184                                 INIT_LIST_HEAD(&tmp_list);
3185                                 list_del_init(&wdata->list);
3186
3187                                 if (ctx->direct_io)
3188                                         rc = cifs_resend_wdata(
3189                                                 wdata, &tmp_list, ctx);
3190                                 else {
3191                                         iov_iter_advance(&tmp_from,
3192                                                  wdata->offset - ctx->pos);
3193
3194                                         rc = cifs_write_from_iter(wdata->offset,
3195                                                 wdata->bytes, &tmp_from,
3196                                                 ctx->cfile, cifs_sb, &tmp_list,
3197                                                 ctx);
3198
3199                                         kref_put(&wdata->refcount,
3200                                                 cifs_uncached_writedata_release);
3201                                 }
3202
3203                                 list_splice(&tmp_list, &ctx->list);
3204                                 goto restart_loop;
3205                         }
3206                 }
3207                 list_del_init(&wdata->list);
3208                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3209         }
3210
3211         cifs_stats_bytes_written(tcon, ctx->total_len);
3212         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3213
3214         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3215
3216         mutex_unlock(&ctx->aio_mutex);
3217
3218         if (ctx->iocb && ctx->iocb->ki_complete)
3219                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3220         else
3221                 complete(&ctx->done);
3222 }
3223
3224 static ssize_t __cifs_writev(
3225         struct kiocb *iocb, struct iov_iter *from, bool direct)
3226 {
3227         struct file *file = iocb->ki_filp;
3228         ssize_t total_written = 0;
3229         struct cifsFileInfo *cfile;
3230         struct cifs_tcon *tcon;
3231         struct cifs_sb_info *cifs_sb;
3232         struct cifs_aio_ctx *ctx;
3233         struct iov_iter saved_from = *from;
3234         size_t len = iov_iter_count(from);
3235         int rc;
3236
3237         /*
3238          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3239          * In this case, fall back to non-direct write function.
3240          * this could be improved by getting pages directly in ITER_KVEC
3241          */
3242         if (direct && iov_iter_is_kvec(from)) {
3243                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3244                 direct = false;
3245         }
3246
3247         rc = generic_write_checks(iocb, from);
3248         if (rc <= 0)
3249                 return rc;
3250
3251         cifs_sb = CIFS_FILE_SB(file);
3252         cfile = file->private_data;
3253         tcon = tlink_tcon(cfile->tlink);
3254
3255         if (!tcon->ses->server->ops->async_writev)
3256                 return -ENOSYS;
3257
3258         ctx = cifs_aio_ctx_alloc();
3259         if (!ctx)
3260                 return -ENOMEM;
3261
3262         ctx->cfile = cifsFileInfo_get(cfile);
3263
3264         if (!is_sync_kiocb(iocb))
3265                 ctx->iocb = iocb;
3266
3267         ctx->pos = iocb->ki_pos;
3268
3269         if (direct) {
3270                 ctx->direct_io = true;
3271                 ctx->iter = *from;
3272                 ctx->len = len;
3273         } else {
3274                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3275                 if (rc) {
3276                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3277                         return rc;
3278                 }
3279         }
3280
3281         /* grab a lock here due to read response handlers can access ctx */
3282         mutex_lock(&ctx->aio_mutex);
3283
3284         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3285                                   cfile, cifs_sb, &ctx->list, ctx);
3286
3287         /*
3288          * If at least one write was successfully sent, then discard any rc
3289          * value from the later writes. If the other write succeeds, then
3290          * we'll end up returning whatever was written. If it fails, then
3291          * we'll get a new rc value from that.
3292          */
3293         if (!list_empty(&ctx->list))
3294                 rc = 0;
3295
3296         mutex_unlock(&ctx->aio_mutex);
3297
3298         if (rc) {
3299                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3300                 return rc;
3301         }
3302
3303         if (!is_sync_kiocb(iocb)) {
3304                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3305                 return -EIOCBQUEUED;
3306         }
3307
3308         rc = wait_for_completion_killable(&ctx->done);
3309         if (rc) {
3310                 mutex_lock(&ctx->aio_mutex);
3311                 ctx->rc = rc = -EINTR;
3312                 total_written = ctx->total_len;
3313                 mutex_unlock(&ctx->aio_mutex);
3314         } else {
3315                 rc = ctx->rc;
3316                 total_written = ctx->total_len;
3317         }
3318
3319         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3320
3321         if (unlikely(!total_written))
3322                 return rc;
3323
3324         iocb->ki_pos += total_written;
3325         return total_written;
3326 }
3327
3328 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3329 {
3330         return __cifs_writev(iocb, from, true);
3331 }
3332
3333 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3334 {
3335         return __cifs_writev(iocb, from, false);
3336 }
3337
3338 static ssize_t
3339 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3340 {
3341         struct file *file = iocb->ki_filp;
3342         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3343         struct inode *inode = file->f_mapping->host;
3344         struct cifsInodeInfo *cinode = CIFS_I(inode);
3345         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3346         ssize_t rc;
3347
3348         inode_lock(inode);
3349         /*
3350          * We need to hold the sem to be sure nobody modifies lock list
3351          * with a brlock that prevents writing.
3352          */
3353         down_read(&cinode->lock_sem);
3354
3355         rc = generic_write_checks(iocb, from);
3356         if (rc <= 0)
3357                 goto out;
3358
3359         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3360                                      server->vals->exclusive_lock_type, 0,
3361                                      NULL, CIFS_WRITE_OP))
3362                 rc = __generic_file_write_iter(iocb, from);
3363         else
3364                 rc = -EACCES;
3365 out:
3366         up_read(&cinode->lock_sem);
3367         inode_unlock(inode);
3368
3369         if (rc > 0)
3370                 rc = generic_write_sync(iocb, rc);
3371         return rc;
3372 }
3373
3374 ssize_t
3375 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3376 {
3377         struct inode *inode = file_inode(iocb->ki_filp);
3378         struct cifsInodeInfo *cinode = CIFS_I(inode);
3379         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3380         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3381                                                 iocb->ki_filp->private_data;
3382         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3383         ssize_t written;
3384
3385         written = cifs_get_writer(cinode);
3386         if (written)
3387                 return written;
3388
3389         if (CIFS_CACHE_WRITE(cinode)) {
3390                 if (cap_unix(tcon->ses) &&
3391                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3392                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3393                         written = generic_file_write_iter(iocb, from);
3394                         goto out;
3395                 }
3396                 written = cifs_writev(iocb, from);
3397                 goto out;
3398         }
3399         /*
3400          * For non-oplocked files in strict cache mode we need to write the data
3401          * to the server exactly from the pos to pos+len-1 rather than flush all
3402          * affected pages because it may cause a error with mandatory locks on
3403          * these pages but not on the region from pos to ppos+len-1.
3404          */
3405         written = cifs_user_writev(iocb, from);
3406         if (CIFS_CACHE_READ(cinode)) {
3407                 /*
3408                  * We have read level caching and we have just sent a write
3409                  * request to the server thus making data in the cache stale.
3410                  * Zap the cache and set oplock/lease level to NONE to avoid
3411                  * reading stale data from the cache. All subsequent read
3412                  * operations will read new data from the server.
3413                  */
3414                 cifs_zap_mapping(inode);
3415                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3416                          inode);
3417                 cinode->oplock = 0;
3418         }
3419 out:
3420         cifs_put_writer(cinode);
3421         return written;
3422 }
3423
3424 static struct cifs_readdata *
3425 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3426 {
3427         struct cifs_readdata *rdata;
3428
3429         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3430         if (rdata != NULL) {
3431                 rdata->pages = pages;
3432                 kref_init(&rdata->refcount);
3433                 INIT_LIST_HEAD(&rdata->list);
3434                 init_completion(&rdata->done);
3435                 INIT_WORK(&rdata->work, complete);
3436         }
3437
3438         return rdata;
3439 }
3440
3441 static struct cifs_readdata *
3442 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3443 {
3444         struct page **pages =
3445                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3446         struct cifs_readdata *ret = NULL;
3447
3448         if (pages) {
3449                 ret = cifs_readdata_direct_alloc(pages, complete);
3450                 if (!ret)
3451                         kfree(pages);
3452         }
3453
3454         return ret;
3455 }
3456
3457 void
3458 cifs_readdata_release(struct kref *refcount)
3459 {
3460         struct cifs_readdata *rdata = container_of(refcount,
3461                                         struct cifs_readdata, refcount);
3462 #ifdef CONFIG_CIFS_SMB_DIRECT
3463         if (rdata->mr) {
3464                 smbd_deregister_mr(rdata->mr);
3465                 rdata->mr = NULL;
3466         }
3467 #endif
3468         if (rdata->cfile)
3469                 cifsFileInfo_put(rdata->cfile);
3470
3471         kvfree(rdata->pages);
3472         kfree(rdata);
3473 }
3474
3475 static int
3476 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3477 {
3478         int rc = 0;
3479         struct page *page;
3480         unsigned int i;
3481
3482         for (i = 0; i < nr_pages; i++) {
3483                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3484                 if (!page) {
3485                         rc = -ENOMEM;
3486                         break;
3487                 }
3488                 rdata->pages[i] = page;
3489         }
3490
3491         if (rc) {
3492                 unsigned int nr_page_failed = i;
3493
3494                 for (i = 0; i < nr_page_failed; i++) {
3495                         put_page(rdata->pages[i]);
3496                         rdata->pages[i] = NULL;
3497                 }
3498         }
3499         return rc;
3500 }
3501
3502 static void
3503 cifs_uncached_readdata_release(struct kref *refcount)
3504 {
3505         struct cifs_readdata *rdata = container_of(refcount,
3506                                         struct cifs_readdata, refcount);
3507         unsigned int i;
3508
3509         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3510         for (i = 0; i < rdata->nr_pages; i++) {
3511                 put_page(rdata->pages[i]);
3512         }
3513         cifs_readdata_release(refcount);
3514 }
3515
3516 /**
3517  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3518  * @rdata:      the readdata response with list of pages holding data
3519  * @iter:       destination for our data
3520  *
3521  * This function copies data from a list of pages in a readdata response into
3522  * an array of iovecs. It will first calculate where the data should go
3523  * based on the info in the readdata and then copy the data into that spot.
3524  */
3525 static int
3526 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3527 {
3528         size_t remaining = rdata->got_bytes;
3529         unsigned int i;
3530
3531         for (i = 0; i < rdata->nr_pages; i++) {
3532                 struct page *page = rdata->pages[i];
3533                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3534                 size_t written;
3535
3536                 if (unlikely(iov_iter_is_pipe(iter))) {
3537                         void *addr = kmap_atomic(page);
3538
3539                         written = copy_to_iter(addr, copy, iter);
3540                         kunmap_atomic(addr);
3541                 } else
3542                         written = copy_page_to_iter(page, 0, copy, iter);
3543                 remaining -= written;
3544                 if (written < copy && iov_iter_count(iter) > 0)
3545                         break;
3546         }
3547         return remaining ? -EFAULT : 0;
3548 }
3549
3550 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3551
3552 static void
3553 cifs_uncached_readv_complete(struct work_struct *work)
3554 {
3555         struct cifs_readdata *rdata = container_of(work,
3556                                                 struct cifs_readdata, work);
3557
3558         complete(&rdata->done);
3559         collect_uncached_read_data(rdata->ctx);
3560         /* the below call can possibly free the last ref to aio ctx */
3561         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3562 }
3563
3564 static int
3565 uncached_fill_pages(struct TCP_Server_Info *server,
3566                     struct cifs_readdata *rdata, struct iov_iter *iter,
3567                     unsigned int len)
3568 {
3569         int result = 0;
3570         unsigned int i;
3571         unsigned int nr_pages = rdata->nr_pages;
3572         unsigned int page_offset = rdata->page_offset;
3573
3574         rdata->got_bytes = 0;
3575         rdata->tailsz = PAGE_SIZE;
3576         for (i = 0; i < nr_pages; i++) {
3577                 struct page *page = rdata->pages[i];
3578                 size_t n;
3579                 unsigned int segment_size = rdata->pagesz;
3580
3581                 if (i == 0)
3582                         segment_size -= page_offset;
3583                 else
3584                         page_offset = 0;
3585
3586
3587                 if (len <= 0) {
3588                         /* no need to hold page hostage */
3589                         rdata->pages[i] = NULL;
3590                         rdata->nr_pages--;
3591                         put_page(page);
3592                         continue;
3593                 }
3594
3595                 n = len;
3596                 if (len >= segment_size)
3597                         /* enough data to fill the page */
3598                         n = segment_size;
3599                 else
3600                         rdata->tailsz = len;
3601                 len -= n;
3602
3603                 if (iter)
3604                         result = copy_page_from_iter(
3605                                         page, page_offset, n, iter);
3606 #ifdef CONFIG_CIFS_SMB_DIRECT
3607                 else if (rdata->mr)
3608                         result = n;
3609 #endif
3610                 else
3611                         result = cifs_read_page_from_socket(
3612                                         server, page, page_offset, n);
3613                 if (result < 0)
3614                         break;
3615
3616                 rdata->got_bytes += result;
3617         }
3618
3619         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3620                                                 rdata->got_bytes : result;
3621 }
3622
3623 static int
3624 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3625                               struct cifs_readdata *rdata, unsigned int len)
3626 {
3627         return uncached_fill_pages(server, rdata, NULL, len);
3628 }
3629
3630 static int
3631 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3632                               struct cifs_readdata *rdata,
3633                               struct iov_iter *iter)
3634 {
3635         return uncached_fill_pages(server, rdata, iter, iter->count);
3636 }
3637
3638 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3639                         struct list_head *rdata_list,
3640                         struct cifs_aio_ctx *ctx)
3641 {
3642         unsigned int rsize;
3643         struct cifs_credits credits;
3644         int rc;
3645         struct TCP_Server_Info *server;
3646
3647         /* XXX: should we pick a new channel here? */
3648         server = rdata->server;
3649
3650         do {
3651                 if (rdata->cfile->invalidHandle) {
3652                         rc = cifs_reopen_file(rdata->cfile, true);
3653                         if (rc == -EAGAIN)
3654                                 continue;
3655                         else if (rc)
3656                                 break;
3657                 }
3658
3659                 /*
3660                  * Wait for credits to resend this rdata.
3661                  * Note: we are attempting to resend the whole rdata not in
3662                  * segments
3663                  */
3664                 do {
3665                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3666                                                 &rsize, &credits);
3667
3668                         if (rc)
3669                                 goto fail;
3670
3671                         if (rsize < rdata->bytes) {
3672                                 add_credits_and_wake_if(server, &credits, 0);
3673                                 msleep(1000);
3674                         }
3675                 } while (rsize < rdata->bytes);
3676                 rdata->credits = credits;
3677
3678                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3679                 if (!rc) {
3680                         if (rdata->cfile->invalidHandle)
3681                                 rc = -EAGAIN;
3682                         else {
3683 #ifdef CONFIG_CIFS_SMB_DIRECT
3684                                 if (rdata->mr) {
3685                                         rdata->mr->need_invalidate = true;
3686                                         smbd_deregister_mr(rdata->mr);
3687                                         rdata->mr = NULL;
3688                                 }
3689 #endif
3690                                 rc = server->ops->async_readv(rdata);
3691                         }
3692                 }
3693
3694                 /* If the read was successfully sent, we are done */
3695                 if (!rc) {
3696                         /* Add to aio pending list */
3697                         list_add_tail(&rdata->list, rdata_list);
3698                         return 0;
3699                 }
3700
3701                 /* Roll back credits and retry if needed */
3702                 add_credits_and_wake_if(server, &rdata->credits, 0);
3703         } while (rc == -EAGAIN);
3704
3705 fail:
3706         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3707         return rc;
3708 }
3709
3710 static int
3711 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3712                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3713                      struct cifs_aio_ctx *ctx)
3714 {
3715         struct cifs_readdata *rdata;
3716         unsigned int npages, rsize;
3717         struct cifs_credits credits_on_stack;
3718         struct cifs_credits *credits = &credits_on_stack;
3719         size_t cur_len;
3720         int rc;
3721         pid_t pid;
3722         struct TCP_Server_Info *server;
3723         struct page **pagevec;
3724         size_t start;
3725         struct iov_iter direct_iov = ctx->iter;
3726
3727         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3728
3729         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3730                 pid = open_file->pid;
3731         else
3732                 pid = current->tgid;
3733
3734         if (ctx->direct_io)
3735                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3736
3737         do {
3738                 if (open_file->invalidHandle) {
3739                         rc = cifs_reopen_file(open_file, true);
3740                         if (rc == -EAGAIN)
3741                                 continue;
3742                         else if (rc)
3743                                 break;
3744                 }
3745
3746                 if (cifs_sb->ctx->rsize == 0)
3747                         cifs_sb->ctx->rsize =
3748                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3749                                                              cifs_sb->ctx);
3750
3751                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3752                                                    &rsize, credits);
3753                 if (rc)
3754                         break;
3755
3756                 cur_len = min_t(const size_t, len, rsize);
3757
3758                 if (ctx->direct_io) {
3759                         ssize_t result;
3760
3761                         result = iov_iter_get_pages_alloc(
3762                                         &direct_iov, &pagevec,
3763                                         cur_len, &start);
3764                         if (result < 0) {
3765                                 cifs_dbg(VFS,
3766                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3767                                          result, iov_iter_type(&direct_iov),
3768                                          direct_iov.iov_offset,
3769                                          direct_iov.count);
3770                                 dump_stack();
3771
3772                                 rc = result;
3773                                 add_credits_and_wake_if(server, credits, 0);
3774                                 break;
3775                         }
3776                         cur_len = (size_t)result;
3777                         iov_iter_advance(&direct_iov, cur_len);
3778
3779                         rdata = cifs_readdata_direct_alloc(
3780                                         pagevec, cifs_uncached_readv_complete);
3781                         if (!rdata) {
3782                                 add_credits_and_wake_if(server, credits, 0);
3783                                 rc = -ENOMEM;
3784                                 break;
3785                         }
3786
3787                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3788                         rdata->page_offset = start;
3789                         rdata->tailsz = npages > 1 ?
3790                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3791                                 cur_len;
3792
3793                 } else {
3794
3795                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3796                         /* allocate a readdata struct */
3797                         rdata = cifs_readdata_alloc(npages,
3798                                             cifs_uncached_readv_complete);
3799                         if (!rdata) {
3800                                 add_credits_and_wake_if(server, credits, 0);
3801                                 rc = -ENOMEM;
3802                                 break;
3803                         }
3804
3805                         rc = cifs_read_allocate_pages(rdata, npages);
3806                         if (rc) {
3807                                 kvfree(rdata->pages);
3808                                 kfree(rdata);
3809                                 add_credits_and_wake_if(server, credits, 0);
3810                                 break;
3811                         }
3812
3813                         rdata->tailsz = PAGE_SIZE;
3814                 }
3815
3816                 rdata->server = server;
3817                 rdata->cfile = cifsFileInfo_get(open_file);
3818                 rdata->nr_pages = npages;
3819                 rdata->offset = offset;
3820                 rdata->bytes = cur_len;
3821                 rdata->pid = pid;
3822                 rdata->pagesz = PAGE_SIZE;
3823                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3824                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3825                 rdata->credits = credits_on_stack;
3826                 rdata->ctx = ctx;
3827                 kref_get(&ctx->refcount);
3828
3829                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3830
3831                 if (!rc) {
3832                         if (rdata->cfile->invalidHandle)
3833                                 rc = -EAGAIN;
3834                         else
3835                                 rc = server->ops->async_readv(rdata);
3836                 }
3837
3838                 if (rc) {
3839                         add_credits_and_wake_if(server, &rdata->credits, 0);
3840                         kref_put(&rdata->refcount,
3841                                 cifs_uncached_readdata_release);
3842                         if (rc == -EAGAIN) {
3843                                 iov_iter_revert(&direct_iov, cur_len);
3844                                 continue;
3845                         }
3846                         break;
3847                 }
3848
3849                 list_add_tail(&rdata->list, rdata_list);
3850                 offset += cur_len;
3851                 len -= cur_len;
3852         } while (len > 0);
3853
3854         return rc;
3855 }
3856
3857 static void
3858 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3859 {
3860         struct cifs_readdata *rdata, *tmp;
3861         struct iov_iter *to = &ctx->iter;
3862         struct cifs_sb_info *cifs_sb;
3863         int rc;
3864
3865         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3866
3867         mutex_lock(&ctx->aio_mutex);
3868
3869         if (list_empty(&ctx->list)) {
3870                 mutex_unlock(&ctx->aio_mutex);
3871                 return;
3872         }
3873
3874         rc = ctx->rc;
3875         /* the loop below should proceed in the order of increasing offsets */
3876 again:
3877         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3878                 if (!rc) {
3879                         if (!try_wait_for_completion(&rdata->done)) {
3880                                 mutex_unlock(&ctx->aio_mutex);
3881                                 return;
3882                         }
3883
3884                         if (rdata->result == -EAGAIN) {
3885                                 /* resend call if it's a retryable error */
3886                                 struct list_head tmp_list;
3887                                 unsigned int got_bytes = rdata->got_bytes;
3888
3889                                 list_del_init(&rdata->list);
3890                                 INIT_LIST_HEAD(&tmp_list);
3891
3892                                 /*
3893                                  * Got a part of data and then reconnect has
3894                                  * happened -- fill the buffer and continue
3895                                  * reading.
3896                                  */
3897                                 if (got_bytes && got_bytes < rdata->bytes) {
3898                                         rc = 0;
3899                                         if (!ctx->direct_io)
3900                                                 rc = cifs_readdata_to_iov(rdata, to);
3901                                         if (rc) {
3902                                                 kref_put(&rdata->refcount,
3903                                                         cifs_uncached_readdata_release);
3904                                                 continue;
3905                                         }
3906                                 }
3907
3908                                 if (ctx->direct_io) {
3909                                         /*
3910                                          * Re-use rdata as this is a
3911                                          * direct I/O
3912                                          */
3913                                         rc = cifs_resend_rdata(
3914                                                 rdata,
3915                                                 &tmp_list, ctx);
3916                                 } else {
3917                                         rc = cifs_send_async_read(
3918                                                 rdata->offset + got_bytes,
3919                                                 rdata->bytes - got_bytes,
3920                                                 rdata->cfile, cifs_sb,
3921                                                 &tmp_list, ctx);
3922
3923                                         kref_put(&rdata->refcount,
3924                                                 cifs_uncached_readdata_release);
3925                                 }
3926
3927                                 list_splice(&tmp_list, &ctx->list);
3928
3929                                 goto again;
3930                         } else if (rdata->result)
3931                                 rc = rdata->result;
3932                         else if (!ctx->direct_io)
3933                                 rc = cifs_readdata_to_iov(rdata, to);
3934
3935                         /* if there was a short read -- discard anything left */
3936                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3937                                 rc = -ENODATA;
3938
3939                         ctx->total_len += rdata->got_bytes;
3940                 }
3941                 list_del_init(&rdata->list);
3942                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3943         }
3944
3945         if (!ctx->direct_io)
3946                 ctx->total_len = ctx->len - iov_iter_count(to);
3947
3948         /* mask nodata case */
3949         if (rc == -ENODATA)
3950                 rc = 0;
3951
3952         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3953
3954         mutex_unlock(&ctx->aio_mutex);
3955
3956         if (ctx->iocb && ctx->iocb->ki_complete)
3957                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3958         else
3959                 complete(&ctx->done);
3960 }
3961
3962 static ssize_t __cifs_readv(
3963         struct kiocb *iocb, struct iov_iter *to, bool direct)
3964 {
3965         size_t len;
3966         struct file *file = iocb->ki_filp;
3967         struct cifs_sb_info *cifs_sb;
3968         struct cifsFileInfo *cfile;
3969         struct cifs_tcon *tcon;
3970         ssize_t rc, total_read = 0;
3971         loff_t offset = iocb->ki_pos;
3972         struct cifs_aio_ctx *ctx;
3973
3974         /*
3975          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3976          * fall back to data copy read path
3977          * this could be improved by getting pages directly in ITER_KVEC
3978          */
3979         if (direct && iov_iter_is_kvec(to)) {
3980                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3981                 direct = false;
3982         }
3983
3984         len = iov_iter_count(to);
3985         if (!len)
3986                 return 0;
3987
3988         cifs_sb = CIFS_FILE_SB(file);
3989         cfile = file->private_data;
3990         tcon = tlink_tcon(cfile->tlink);
3991
3992         if (!tcon->ses->server->ops->async_readv)
3993                 return -ENOSYS;
3994
3995         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3996                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3997
3998         ctx = cifs_aio_ctx_alloc();
3999         if (!ctx)
4000                 return -ENOMEM;
4001
4002         ctx->cfile = cifsFileInfo_get(cfile);
4003
4004         if (!is_sync_kiocb(iocb))
4005                 ctx->iocb = iocb;
4006
4007         if (iter_is_iovec(to))
4008                 ctx->should_dirty = true;
4009
4010         if (direct) {
4011                 ctx->pos = offset;
4012                 ctx->direct_io = true;
4013                 ctx->iter = *to;
4014                 ctx->len = len;
4015         } else {
4016                 rc = setup_aio_ctx_iter(ctx, to, READ);
4017                 if (rc) {
4018                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4019                         return rc;
4020                 }
4021                 len = ctx->len;
4022         }
4023
4024         /* grab a lock here due to read response handlers can access ctx */
4025         mutex_lock(&ctx->aio_mutex);
4026
4027         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4028
4029         /* if at least one read request send succeeded, then reset rc */
4030         if (!list_empty(&ctx->list))
4031                 rc = 0;
4032
4033         mutex_unlock(&ctx->aio_mutex);
4034
4035         if (rc) {
4036                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4037                 return rc;
4038         }
4039
4040         if (!is_sync_kiocb(iocb)) {
4041                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4042                 return -EIOCBQUEUED;
4043         }
4044
4045         rc = wait_for_completion_killable(&ctx->done);
4046         if (rc) {
4047                 mutex_lock(&ctx->aio_mutex);
4048                 ctx->rc = rc = -EINTR;
4049                 total_read = ctx->total_len;
4050                 mutex_unlock(&ctx->aio_mutex);
4051         } else {
4052                 rc = ctx->rc;
4053                 total_read = ctx->total_len;
4054         }
4055
4056         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4057
4058         if (total_read) {
4059                 iocb->ki_pos += total_read;
4060                 return total_read;
4061         }
4062         return rc;
4063 }
4064
4065 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4066 {
4067         return __cifs_readv(iocb, to, true);
4068 }
4069
4070 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4071 {
4072         return __cifs_readv(iocb, to, false);
4073 }
4074
4075 ssize_t
4076 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4077 {
4078         struct inode *inode = file_inode(iocb->ki_filp);
4079         struct cifsInodeInfo *cinode = CIFS_I(inode);
4080         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4081         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4082                                                 iocb->ki_filp->private_data;
4083         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4084         int rc = -EACCES;
4085
4086         /*
4087          * In strict cache mode we need to read from the server all the time
4088          * if we don't have level II oplock because the server can delay mtime
4089          * change - so we can't make a decision about inode invalidating.
4090          * And we can also fail with pagereading if there are mandatory locks
4091          * on pages affected by this read but not on the region from pos to
4092          * pos+len-1.
4093          */
4094         if (!CIFS_CACHE_READ(cinode))
4095                 return cifs_user_readv(iocb, to);
4096
4097         if (cap_unix(tcon->ses) &&
4098             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4099             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4100                 return generic_file_read_iter(iocb, to);
4101
4102         /*
4103          * We need to hold the sem to be sure nobody modifies lock list
4104          * with a brlock that prevents reading.
4105          */
4106         down_read(&cinode->lock_sem);
4107         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4108                                      tcon->ses->server->vals->shared_lock_type,
4109                                      0, NULL, CIFS_READ_OP))
4110                 rc = generic_file_read_iter(iocb, to);
4111         up_read(&cinode->lock_sem);
4112         return rc;
4113 }
4114
4115 static ssize_t
4116 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4117 {
4118         int rc = -EACCES;
4119         unsigned int bytes_read = 0;
4120         unsigned int total_read;
4121         unsigned int current_read_size;
4122         unsigned int rsize;
4123         struct cifs_sb_info *cifs_sb;
4124         struct cifs_tcon *tcon;
4125         struct TCP_Server_Info *server;
4126         unsigned int xid;
4127         char *cur_offset;
4128         struct cifsFileInfo *open_file;
4129         struct cifs_io_parms io_parms = {0};
4130         int buf_type = CIFS_NO_BUFFER;
4131         __u32 pid;
4132
4133         xid = get_xid();
4134         cifs_sb = CIFS_FILE_SB(file);
4135
4136         /* FIXME: set up handlers for larger reads and/or convert to async */
4137         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4138
4139         if (file->private_data == NULL) {
4140                 rc = -EBADF;
4141                 free_xid(xid);
4142                 return rc;
4143         }
4144         open_file = file->private_data;
4145         tcon = tlink_tcon(open_file->tlink);
4146         server = cifs_pick_channel(tcon->ses);
4147
4148         if (!server->ops->sync_read) {
4149                 free_xid(xid);
4150                 return -ENOSYS;
4151         }
4152
4153         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4154                 pid = open_file->pid;
4155         else
4156                 pid = current->tgid;
4157
4158         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4159                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4160
4161         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4162              total_read += bytes_read, cur_offset += bytes_read) {
4163                 do {
4164                         current_read_size = min_t(uint, read_size - total_read,
4165                                                   rsize);
4166                         /*
4167                          * For windows me and 9x we do not want to request more
4168                          * than it negotiated since it will refuse the read
4169                          * then.
4170                          */
4171                         if (!(tcon->ses->capabilities &
4172                                 tcon->ses->server->vals->cap_large_files)) {
4173                                 current_read_size = min_t(uint,
4174                                         current_read_size, CIFSMaxBufSize);
4175                         }
4176                         if (open_file->invalidHandle) {
4177                                 rc = cifs_reopen_file(open_file, true);
4178                                 if (rc != 0)
4179                                         break;
4180                         }
4181                         io_parms.pid = pid;
4182                         io_parms.tcon = tcon;
4183                         io_parms.offset = *offset;
4184                         io_parms.length = current_read_size;
4185                         io_parms.server = server;
4186                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4187                                                     &bytes_read, &cur_offset,
4188                                                     &buf_type);
4189                 } while (rc == -EAGAIN);
4190
4191                 if (rc || (bytes_read == 0)) {
4192                         if (total_read) {
4193                                 break;
4194                         } else {
4195                                 free_xid(xid);
4196                                 return rc;
4197                         }
4198                 } else {
4199                         cifs_stats_bytes_read(tcon, total_read);
4200                         *offset += bytes_read;
4201                 }
4202         }
4203         free_xid(xid);
4204         return total_read;
4205 }
4206
4207 /*
4208  * If the page is mmap'ed into a process' page tables, then we need to make
4209  * sure that it doesn't change while being written back.
4210  */
4211 static vm_fault_t
4212 cifs_page_mkwrite(struct vm_fault *vmf)
4213 {
4214         struct page *page = vmf->page;
4215
4216         /* Wait for the page to be written to the cache before we allow it to
4217          * be modified.  We then assume the entire page will need writing back.
4218          */
4219 #ifdef CONFIG_CIFS_FSCACHE
4220         if (PageFsCache(page) &&
4221             wait_on_page_fscache_killable(page) < 0)
4222                 return VM_FAULT_RETRY;
4223 #endif
4224
4225         wait_on_page_writeback(page);
4226
4227         if (lock_page_killable(page) < 0)
4228                 return VM_FAULT_RETRY;
4229         return VM_FAULT_LOCKED;
4230 }
4231
4232 static const struct vm_operations_struct cifs_file_vm_ops = {
4233         .fault = filemap_fault,
4234         .map_pages = filemap_map_pages,
4235         .page_mkwrite = cifs_page_mkwrite,
4236 };
4237
4238 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4239 {
4240         int xid, rc = 0;
4241         struct inode *inode = file_inode(file);
4242
4243         xid = get_xid();
4244
4245         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4246                 rc = cifs_zap_mapping(inode);
4247         if (!rc)
4248                 rc = generic_file_mmap(file, vma);
4249         if (!rc)
4250                 vma->vm_ops = &cifs_file_vm_ops;
4251
4252         free_xid(xid);
4253         return rc;
4254 }
4255
4256 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4257 {
4258         int rc, xid;
4259
4260         xid = get_xid();
4261
4262         rc = cifs_revalidate_file(file);
4263         if (rc)
4264                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4265                          rc);
4266         if (!rc)
4267                 rc = generic_file_mmap(file, vma);
4268         if (!rc)
4269                 vma->vm_ops = &cifs_file_vm_ops;
4270
4271         free_xid(xid);
4272         return rc;
4273 }
4274
4275 static void
4276 cifs_readv_complete(struct work_struct *work)
4277 {
4278         unsigned int i, got_bytes;
4279         struct cifs_readdata *rdata = container_of(work,
4280                                                 struct cifs_readdata, work);
4281
4282         got_bytes = rdata->got_bytes;
4283         for (i = 0; i < rdata->nr_pages; i++) {
4284                 struct page *page = rdata->pages[i];
4285
4286                 if (rdata->result == 0 ||
4287                     (rdata->result == -EAGAIN && got_bytes)) {
4288                         flush_dcache_page(page);
4289                         SetPageUptodate(page);
4290                 } else
4291                         SetPageError(page);
4292
4293                 if (rdata->result == 0 ||
4294                     (rdata->result == -EAGAIN && got_bytes))
4295                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4296
4297                 unlock_page(page);
4298
4299                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4300
4301                 put_page(page);
4302                 rdata->pages[i] = NULL;
4303         }
4304         kref_put(&rdata->refcount, cifs_readdata_release);
4305 }
4306
4307 static int
4308 readpages_fill_pages(struct TCP_Server_Info *server,
4309                      struct cifs_readdata *rdata, struct iov_iter *iter,
4310                      unsigned int len)
4311 {
4312         int result = 0;
4313         unsigned int i;
4314         u64 eof;
4315         pgoff_t eof_index;
4316         unsigned int nr_pages = rdata->nr_pages;
4317         unsigned int page_offset = rdata->page_offset;
4318
4319         /* determine the eof that the server (probably) has */
4320         eof = CIFS_I(rdata->mapping->host)->server_eof;
4321         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4322         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4323
4324         rdata->got_bytes = 0;
4325         rdata->tailsz = PAGE_SIZE;
4326         for (i = 0; i < nr_pages; i++) {
4327                 struct page *page = rdata->pages[i];
4328                 unsigned int to_read = rdata->pagesz;
4329                 size_t n;
4330
4331                 if (i == 0)
4332                         to_read -= page_offset;
4333                 else
4334                         page_offset = 0;
4335
4336                 n = to_read;
4337
4338                 if (len >= to_read) {
4339                         len -= to_read;
4340                 } else if (len > 0) {
4341                         /* enough for partial page, fill and zero the rest */
4342                         zero_user(page, len + page_offset, to_read - len);
4343                         n = rdata->tailsz = len;
4344                         len = 0;
4345                 } else if (page->index > eof_index) {
4346                         /*
4347                          * The VFS will not try to do readahead past the
4348                          * i_size, but it's possible that we have outstanding
4349                          * writes with gaps in the middle and the i_size hasn't
4350                          * caught up yet. Populate those with zeroed out pages
4351                          * to prevent the VFS from repeatedly attempting to
4352                          * fill them until the writes are flushed.
4353                          */
4354                         zero_user(page, 0, PAGE_SIZE);
4355                         flush_dcache_page(page);
4356                         SetPageUptodate(page);
4357                         unlock_page(page);
4358                         put_page(page);
4359                         rdata->pages[i] = NULL;
4360                         rdata->nr_pages--;
4361                         continue;
4362                 } else {
4363                         /* no need to hold page hostage */
4364                         unlock_page(page);
4365                         put_page(page);
4366                         rdata->pages[i] = NULL;
4367                         rdata->nr_pages--;
4368                         continue;
4369                 }
4370
4371                 if (iter)
4372                         result = copy_page_from_iter(
4373                                         page, page_offset, n, iter);
4374 #ifdef CONFIG_CIFS_SMB_DIRECT
4375                 else if (rdata->mr)
4376                         result = n;
4377 #endif
4378                 else
4379                         result = cifs_read_page_from_socket(
4380                                         server, page, page_offset, n);
4381                 if (result < 0)
4382                         break;
4383
4384                 rdata->got_bytes += result;
4385         }
4386
4387         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4388                                                 rdata->got_bytes : result;
4389 }
4390
4391 static int
4392 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4393                                struct cifs_readdata *rdata, unsigned int len)
4394 {
4395         return readpages_fill_pages(server, rdata, NULL, len);
4396 }
4397
4398 static int
4399 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4400                                struct cifs_readdata *rdata,
4401                                struct iov_iter *iter)
4402 {
4403         return readpages_fill_pages(server, rdata, iter, iter->count);
4404 }
4405
4406 static void cifs_readahead(struct readahead_control *ractl)
4407 {
4408         int rc;
4409         struct cifsFileInfo *open_file = ractl->file->private_data;
4410         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4411         struct TCP_Server_Info *server;
4412         pid_t pid;
4413         unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0;
4414         pgoff_t next_cached = ULONG_MAX;
4415         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4416                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4417         bool check_cache = caching;
4418
4419         xid = get_xid();
4420
4421         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4422                 pid = open_file->pid;
4423         else
4424                 pid = current->tgid;
4425
4426         rc = 0;
4427         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4428
4429         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4430                  __func__, ractl->file, ractl->mapping, readahead_count(ractl));
4431
4432         /*
4433          * Chop the readahead request up into rsize-sized read requests.
4434          */
4435         while ((nr_pages = readahead_count(ractl) - last_batch_size)) {
4436                 unsigned int i, got, rsize;
4437                 struct page *page;
4438                 struct cifs_readdata *rdata;
4439                 struct cifs_credits credits_on_stack;
4440                 struct cifs_credits *credits = &credits_on_stack;
4441                 pgoff_t index = readahead_index(ractl) + last_batch_size;
4442
4443                 /*
4444                  * Find out if we have anything cached in the range of
4445                  * interest, and if so, where the next chunk of cached data is.
4446                  */
4447                 if (caching) {
4448                         if (check_cache) {
4449                                 rc = cifs_fscache_query_occupancy(
4450                                         ractl->mapping->host, index, nr_pages,
4451                                         &next_cached, &cache_nr_pages);
4452                                 if (rc < 0)
4453                                         caching = false;
4454                                 check_cache = false;
4455                         }
4456
4457                         if (index == next_cached) {
4458                                 /*
4459                                  * TODO: Send a whole batch of pages to be read
4460                                  * by the cache.
4461                                  */
4462                                 page = readahead_page(ractl);
4463                                 last_batch_size = 1 << thp_order(page);
4464                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4465                                                                page) < 0) {
4466                                         /*
4467                                          * TODO: Deal with cache read failure
4468                                          * here, but for the moment, delegate
4469                                          * that to readpage.
4470                                          */
4471                                         caching = false;
4472                                 }
4473                                 unlock_page(page);
4474                                 next_cached++;
4475                                 cache_nr_pages--;
4476                                 if (cache_nr_pages == 0)
4477                                         check_cache = true;
4478                                 continue;
4479                         }
4480                 }
4481
4482                 if (open_file->invalidHandle) {
4483                         rc = cifs_reopen_file(open_file, true);
4484                         if (rc) {
4485                                 if (rc == -EAGAIN)
4486                                         continue;
4487                                 break;
4488                         }
4489                 }
4490
4491                 if (cifs_sb->ctx->rsize == 0)
4492                         cifs_sb->ctx->rsize =
4493                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4494                                                              cifs_sb->ctx);
4495
4496                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4497                                                    &rsize, credits);
4498                 if (rc)
4499                         break;
4500                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
4501                 nr_pages = min_t(size_t, nr_pages, next_cached - index);
4502
4503                 /*
4504                  * Give up immediately if rsize is too small to read an entire
4505                  * page. The VFS will fall back to readpage. We should never
4506                  * reach this point however since we set ra_pages to 0 when the
4507                  * rsize is smaller than a cache page.
4508                  */
4509                 if (unlikely(!nr_pages)) {
4510                         add_credits_and_wake_if(server, credits, 0);
4511                         break;
4512                 }
4513
4514                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4515                 if (!rdata) {
4516                         /* best to give up if we're out of mem */
4517                         add_credits_and_wake_if(server, credits, 0);
4518                         break;
4519                 }
4520
4521                 got = __readahead_batch(ractl, rdata->pages, nr_pages);
4522                 if (got != nr_pages) {
4523                         pr_warn("__readahead_batch() returned %u/%u\n",
4524                                 got, nr_pages);
4525                         nr_pages = got;
4526                 }
4527
4528                 rdata->nr_pages = nr_pages;
4529                 rdata->bytes    = readahead_batch_length(ractl);
4530                 rdata->cfile    = cifsFileInfo_get(open_file);
4531                 rdata->server   = server;
4532                 rdata->mapping  = ractl->mapping;
4533                 rdata->offset   = readahead_pos(ractl);
4534                 rdata->pid      = pid;
4535                 rdata->pagesz   = PAGE_SIZE;
4536                 rdata->tailsz   = PAGE_SIZE;
4537                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4538                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4539                 rdata->credits  = credits_on_stack;
4540
4541                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4542                 if (!rc) {
4543                         if (rdata->cfile->invalidHandle)
4544                                 rc = -EAGAIN;
4545                         else
4546                                 rc = server->ops->async_readv(rdata);
4547                 }
4548
4549                 if (rc) {
4550                         add_credits_and_wake_if(server, &rdata->credits, 0);
4551                         for (i = 0; i < rdata->nr_pages; i++) {
4552                                 page = rdata->pages[i];
4553                                 unlock_page(page);
4554                                 put_page(page);
4555                         }
4556                         /* Fallback to the readpage in error/reconnect cases */
4557                         kref_put(&rdata->refcount, cifs_readdata_release);
4558                         break;
4559                 }
4560
4561                 kref_put(&rdata->refcount, cifs_readdata_release);
4562                 last_batch_size = nr_pages;
4563         }
4564
4565         free_xid(xid);
4566 }
4567
4568 /*
4569  * cifs_readpage_worker must be called with the page pinned
4570  */
4571 static int cifs_readpage_worker(struct file *file, struct page *page,
4572         loff_t *poffset)
4573 {
4574         char *read_data;
4575         int rc;
4576
4577         /* Is the page cached? */
4578         rc = cifs_readpage_from_fscache(file_inode(file), page);
4579         if (rc == 0)
4580                 goto read_complete;
4581
4582         read_data = kmap(page);
4583         /* for reads over a certain size could initiate async read ahead */
4584
4585         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4586
4587         if (rc < 0)
4588                 goto io_error;
4589         else
4590                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4591
4592         /* we do not want atime to be less than mtime, it broke some apps */
4593         file_inode(file)->i_atime = current_time(file_inode(file));
4594         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4595                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4596         else
4597                 file_inode(file)->i_atime = current_time(file_inode(file));
4598
4599         if (PAGE_SIZE > rc)
4600                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4601
4602         flush_dcache_page(page);
4603         SetPageUptodate(page);
4604
4605         /* send this page to the cache */
4606         cifs_readpage_to_fscache(file_inode(file), page);
4607
4608         rc = 0;
4609
4610 io_error:
4611         kunmap(page);
4612         unlock_page(page);
4613
4614 read_complete:
4615         return rc;
4616 }
4617
4618 static int cifs_read_folio(struct file *file, struct folio *folio)
4619 {
4620         struct page *page = &folio->page;
4621         loff_t offset = page_file_offset(page);
4622         int rc = -EACCES;
4623         unsigned int xid;
4624
4625         xid = get_xid();
4626
4627         if (file->private_data == NULL) {
4628                 rc = -EBADF;
4629                 free_xid(xid);
4630                 return rc;
4631         }
4632
4633         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4634                  page, (int)offset, (int)offset);
4635
4636         rc = cifs_readpage_worker(file, page, &offset);
4637
4638         free_xid(xid);
4639         return rc;
4640 }
4641
4642 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4643 {
4644         struct cifsFileInfo *open_file;
4645
4646         spin_lock(&cifs_inode->open_file_lock);
4647         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4648                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4649                         spin_unlock(&cifs_inode->open_file_lock);
4650                         return 1;
4651                 }
4652         }
4653         spin_unlock(&cifs_inode->open_file_lock);
4654         return 0;
4655 }
4656
4657 /* We do not want to update the file size from server for inodes
4658    open for write - to avoid races with writepage extending
4659    the file - in the future we could consider allowing
4660    refreshing the inode only on increases in the file size
4661    but this is tricky to do without racing with writebehind
4662    page caching in the current Linux kernel design */
4663 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4664 {
4665         if (!cifsInode)
4666                 return true;
4667
4668         if (is_inode_writable(cifsInode)) {
4669                 /* This inode is open for write at least once */
4670                 struct cifs_sb_info *cifs_sb;
4671
4672                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4673                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4674                         /* since no page cache to corrupt on directio
4675                         we can change size safely */
4676                         return true;
4677                 }
4678
4679                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4680                         return true;
4681
4682                 return false;
4683         } else
4684                 return true;
4685 }
4686
4687 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4688                         loff_t pos, unsigned len,
4689                         struct page **pagep, void **fsdata)
4690 {
4691         int oncethru = 0;
4692         pgoff_t index = pos >> PAGE_SHIFT;
4693         loff_t offset = pos & (PAGE_SIZE - 1);
4694         loff_t page_start = pos & PAGE_MASK;
4695         loff_t i_size;
4696         struct page *page;
4697         int rc = 0;
4698
4699         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4700
4701 start:
4702         page = grab_cache_page_write_begin(mapping, index);
4703         if (!page) {
4704                 rc = -ENOMEM;
4705                 goto out;
4706         }
4707
4708         if (PageUptodate(page))
4709                 goto out;
4710
4711         /*
4712          * If we write a full page it will be up to date, no need to read from
4713          * the server. If the write is short, we'll end up doing a sync write
4714          * instead.
4715          */
4716         if (len == PAGE_SIZE)
4717                 goto out;
4718
4719         /*
4720          * optimize away the read when we have an oplock, and we're not
4721          * expecting to use any of the data we'd be reading in. That
4722          * is, when the page lies beyond the EOF, or straddles the EOF
4723          * and the write will cover all of the existing data.
4724          */
4725         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4726                 i_size = i_size_read(mapping->host);
4727                 if (page_start >= i_size ||
4728                     (offset == 0 && (pos + len) >= i_size)) {
4729                         zero_user_segments(page, 0, offset,
4730                                            offset + len,
4731                                            PAGE_SIZE);
4732                         /*
4733                          * PageChecked means that the parts of the page
4734                          * to which we're not writing are considered up
4735                          * to date. Once the data is copied to the
4736                          * page, it can be set uptodate.
4737                          */
4738                         SetPageChecked(page);
4739                         goto out;
4740                 }
4741         }
4742
4743         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4744                 /*
4745                  * might as well read a page, it is fast enough. If we get
4746                  * an error, we don't need to return it. cifs_write_end will
4747                  * do a sync write instead since PG_uptodate isn't set.
4748                  */
4749                 cifs_readpage_worker(file, page, &page_start);
4750                 put_page(page);
4751                 oncethru = 1;
4752                 goto start;
4753         } else {
4754                 /* we could try using another file handle if there is one -
4755                    but how would we lock it to prevent close of that handle
4756                    racing with this read? In any case
4757                    this will be written out by write_end so is fine */
4758         }
4759 out:
4760         *pagep = page;
4761         return rc;
4762 }
4763
4764 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4765 {
4766         if (folio_test_private(folio))
4767                 return 0;
4768         if (folio_test_fscache(folio)) {
4769                 if (current_is_kswapd() || !(gfp & __GFP_FS))
4770                         return false;
4771                 folio_wait_fscache(folio);
4772         }
4773         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4774         return true;
4775 }
4776
4777 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4778                                  size_t length)
4779 {
4780         folio_wait_fscache(folio);
4781 }
4782
4783 static int cifs_launder_folio(struct folio *folio)
4784 {
4785         int rc = 0;
4786         loff_t range_start = folio_pos(folio);
4787         loff_t range_end = range_start + folio_size(folio);
4788         struct writeback_control wbc = {
4789                 .sync_mode = WB_SYNC_ALL,
4790                 .nr_to_write = 0,
4791                 .range_start = range_start,
4792                 .range_end = range_end,
4793         };
4794
4795         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4796
4797         if (folio_clear_dirty_for_io(folio))
4798                 rc = cifs_writepage_locked(&folio->page, &wbc);
4799
4800         folio_wait_fscache(folio);
4801         return rc;
4802 }
4803
4804 void cifs_oplock_break(struct work_struct *work)
4805 {
4806         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4807                                                   oplock_break);
4808         struct inode *inode = d_inode(cfile->dentry);
4809         struct cifsInodeInfo *cinode = CIFS_I(inode);
4810         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4811         struct TCP_Server_Info *server = tcon->ses->server;
4812         int rc = 0;
4813         bool purge_cache = false;
4814         bool is_deferred = false;
4815         struct cifs_deferred_close *dclose;
4816
4817         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4818                         TASK_UNINTERRUPTIBLE);
4819
4820         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4821                                       cfile->oplock_epoch, &purge_cache);
4822
4823         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4824                                                 cifs_has_mand_locks(cinode)) {
4825                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4826                          inode);
4827                 cinode->oplock = 0;
4828         }
4829
4830         if (inode && S_ISREG(inode->i_mode)) {
4831                 if (CIFS_CACHE_READ(cinode))
4832                         break_lease(inode, O_RDONLY);
4833                 else
4834                         break_lease(inode, O_WRONLY);
4835                 rc = filemap_fdatawrite(inode->i_mapping);
4836                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4837                         rc = filemap_fdatawait(inode->i_mapping);
4838                         mapping_set_error(inode->i_mapping, rc);
4839                         cifs_zap_mapping(inode);
4840                 }
4841                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4842                 if (CIFS_CACHE_WRITE(cinode))
4843                         goto oplock_break_ack;
4844         }
4845
4846         rc = cifs_push_locks(cfile);
4847         if (rc)
4848                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4849
4850 oplock_break_ack:
4851         /*
4852          * When oplock break is received and there are no active
4853          * file handles but cached, then schedule deferred close immediately.
4854          * So, new open will not use cached handle.
4855          */
4856         spin_lock(&CIFS_I(inode)->deferred_lock);
4857         is_deferred = cifs_is_deferred_close(cfile, &dclose);
4858         spin_unlock(&CIFS_I(inode)->deferred_lock);
4859         if (is_deferred &&
4860             cfile->deferred_close_scheduled &&
4861             delayed_work_pending(&cfile->deferred)) {
4862                 if (cancel_delayed_work(&cfile->deferred)) {
4863                         _cifsFileInfo_put(cfile, false, false);
4864                         goto oplock_break_done;
4865                 }
4866         }
4867         /*
4868          * releasing stale oplock after recent reconnect of smb session using
4869          * a now incorrect file handle is not a data integrity issue but do
4870          * not bother sending an oplock release if session to server still is
4871          * disconnected since oplock already released by the server
4872          */
4873         if (!cfile->oplock_break_cancelled) {
4874                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4875                                                              cinode);
4876                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4877         }
4878 oplock_break_done:
4879         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4880         cifs_done_oplock_break(cinode);
4881 }
4882
4883 /*
4884  * The presence of cifs_direct_io() in the address space ops vector
4885  * allowes open() O_DIRECT flags which would have failed otherwise.
4886  *
4887  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4888  * so this method should never be called.
4889  *
4890  * Direct IO is not yet supported in the cached mode.
4891  */
4892 static ssize_t
4893 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4894 {
4895         /*
4896          * FIXME
4897          * Eventually need to support direct IO for non forcedirectio mounts
4898          */
4899         return -EINVAL;
4900 }
4901
4902 static int cifs_swap_activate(struct swap_info_struct *sis,
4903                               struct file *swap_file, sector_t *span)
4904 {
4905         struct cifsFileInfo *cfile = swap_file->private_data;
4906         struct inode *inode = swap_file->f_mapping->host;
4907         unsigned long blocks;
4908         long long isize;
4909
4910         cifs_dbg(FYI, "swap activate\n");
4911
4912         if (!swap_file->f_mapping->a_ops->swap_rw)
4913                 /* Cannot support swap */
4914                 return -EINVAL;
4915
4916         spin_lock(&inode->i_lock);
4917         blocks = inode->i_blocks;
4918         isize = inode->i_size;
4919         spin_unlock(&inode->i_lock);
4920         if (blocks*512 < isize) {
4921                 pr_warn("swap activate: swapfile has holes\n");
4922                 return -EINVAL;
4923         }
4924         *span = sis->pages;
4925
4926         pr_warn_once("Swap support over SMB3 is experimental\n");
4927
4928         /*
4929          * TODO: consider adding ACL (or documenting how) to prevent other
4930          * users (on this or other systems) from reading it
4931          */
4932
4933
4934         /* TODO: add sk_set_memalloc(inet) or similar */
4935
4936         if (cfile)
4937                 cfile->swapfile = true;
4938         /*
4939          * TODO: Since file already open, we can't open with DENY_ALL here
4940          * but we could add call to grab a byte range lock to prevent others
4941          * from reading or writing the file
4942          */
4943
4944         sis->flags |= SWP_FS_OPS;
4945         return add_swap_extent(sis, 0, sis->max, 0);
4946 }
4947
4948 static void cifs_swap_deactivate(struct file *file)
4949 {
4950         struct cifsFileInfo *cfile = file->private_data;
4951
4952         cifs_dbg(FYI, "swap deactivate\n");
4953
4954         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4955
4956         if (cfile)
4957                 cfile->swapfile = false;
4958
4959         /* do we need to unpin (or unlock) the file */
4960 }
4961
4962 /*
4963  * Mark a page as having been made dirty and thus needing writeback.  We also
4964  * need to pin the cache object to write back to.
4965  */
4966 #ifdef CONFIG_CIFS_FSCACHE
4967 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
4968 {
4969         return fscache_dirty_folio(mapping, folio,
4970                                         cifs_inode_cookie(mapping->host));
4971 }
4972 #else
4973 #define cifs_dirty_folio filemap_dirty_folio
4974 #endif
4975
4976 const struct address_space_operations cifs_addr_ops = {
4977         .read_folio = cifs_read_folio,
4978         .readahead = cifs_readahead,
4979         .writepage = cifs_writepage,
4980         .writepages = cifs_writepages,
4981         .write_begin = cifs_write_begin,
4982         .write_end = cifs_write_end,
4983         .dirty_folio = cifs_dirty_folio,
4984         .release_folio = cifs_release_folio,
4985         .direct_IO = cifs_direct_io,
4986         .invalidate_folio = cifs_invalidate_folio,
4987         .launder_folio = cifs_launder_folio,
4988         /*
4989          * TODO: investigate and if useful we could add an cifs_migratePage
4990          * helper (under an CONFIG_MIGRATION) in the future, and also
4991          * investigate and add an is_dirty_writeback helper if needed
4992          */
4993         .swap_activate = cifs_swap_activate,
4994         .swap_deactivate = cifs_swap_deactivate,
4995 };
4996
4997 /*
4998  * cifs_readahead requires the server to support a buffer large enough to
4999  * contain the header plus one complete page of data.  Otherwise, we need
5000  * to leave cifs_readahead out of the address space operations.
5001  */
5002 const struct address_space_operations cifs_addr_ops_smallbuf = {
5003         .read_folio = cifs_read_folio,
5004         .writepage = cifs_writepage,
5005         .writepages = cifs_writepages,
5006         .write_begin = cifs_write_begin,
5007         .write_end = cifs_write_end,
5008         .dirty_folio = cifs_dirty_folio,
5009         .release_folio = cifs_release_folio,
5010         .invalidate_folio = cifs_invalidate_folio,
5011         .launder_folio = cifs_launder_folio,
5012 };