GNU Linux-libre 5.15.137-gnu
[releases.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "cifs_unicode.h"
30 #include "cifs_debug.h"
31 #include "cifs_fs_sb.h"
32 #include "fscache.h"
33 #include "smbdirect.h"
34 #include "fs_context.h"
35 #include "cifs_ioctl.h"
36
37 static inline int cifs_convert_flags(unsigned int flags)
38 {
39         if ((flags & O_ACCMODE) == O_RDONLY)
40                 return GENERIC_READ;
41         else if ((flags & O_ACCMODE) == O_WRONLY)
42                 return GENERIC_WRITE;
43         else if ((flags & O_ACCMODE) == O_RDWR) {
44                 /* GENERIC_ALL is too much permission to request
45                    can cause unnecessary access denied on create */
46                 /* return GENERIC_ALL; */
47                 return (GENERIC_READ | GENERIC_WRITE);
48         }
49
50         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
51                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
52                 FILE_READ_DATA);
53 }
54
55 static u32 cifs_posix_convert_flags(unsigned int flags)
56 {
57         u32 posix_flags = 0;
58
59         if ((flags & O_ACCMODE) == O_RDONLY)
60                 posix_flags = SMB_O_RDONLY;
61         else if ((flags & O_ACCMODE) == O_WRONLY)
62                 posix_flags = SMB_O_WRONLY;
63         else if ((flags & O_ACCMODE) == O_RDWR)
64                 posix_flags = SMB_O_RDWR;
65
66         if (flags & O_CREAT) {
67                 posix_flags |= SMB_O_CREAT;
68                 if (flags & O_EXCL)
69                         posix_flags |= SMB_O_EXCL;
70         } else if (flags & O_EXCL)
71                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
72                          current->comm, current->tgid);
73
74         if (flags & O_TRUNC)
75                 posix_flags |= SMB_O_TRUNC;
76         /* be safe and imply O_SYNC for O_DSYNC */
77         if (flags & O_DSYNC)
78                 posix_flags |= SMB_O_SYNC;
79         if (flags & O_DIRECTORY)
80                 posix_flags |= SMB_O_DIRECTORY;
81         if (flags & O_NOFOLLOW)
82                 posix_flags |= SMB_O_NOFOLLOW;
83         if (flags & O_DIRECT)
84                 posix_flags |= SMB_O_DIRECT;
85
86         return posix_flags;
87 }
88
89 static inline int cifs_get_disposition(unsigned int flags)
90 {
91         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
92                 return FILE_CREATE;
93         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
94                 return FILE_OVERWRITE_IF;
95         else if ((flags & O_CREAT) == O_CREAT)
96                 return FILE_OPEN_IF;
97         else if ((flags & O_TRUNC) == O_TRUNC)
98                 return FILE_OVERWRITE;
99         else
100                 return FILE_OPEN;
101 }
102
103 int cifs_posix_open(const char *full_path, struct inode **pinode,
104                         struct super_block *sb, int mode, unsigned int f_flags,
105                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
106 {
107         int rc;
108         FILE_UNIX_BASIC_INFO *presp_data;
109         __u32 posix_flags = 0;
110         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
111         struct cifs_fattr fattr;
112         struct tcon_link *tlink;
113         struct cifs_tcon *tcon;
114
115         cifs_dbg(FYI, "posix open %s\n", full_path);
116
117         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
118         if (presp_data == NULL)
119                 return -ENOMEM;
120
121         tlink = cifs_sb_tlink(cifs_sb);
122         if (IS_ERR(tlink)) {
123                 rc = PTR_ERR(tlink);
124                 goto posix_open_ret;
125         }
126
127         tcon = tlink_tcon(tlink);
128         mode &= ~current_umask();
129
130         posix_flags = cifs_posix_convert_flags(f_flags);
131         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
132                              poplock, full_path, cifs_sb->local_nls,
133                              cifs_remap(cifs_sb));
134         cifs_put_tlink(tlink);
135
136         if (rc)
137                 goto posix_open_ret;
138
139         if (presp_data->Type == cpu_to_le32(-1))
140                 goto posix_open_ret; /* open ok, caller does qpathinfo */
141
142         if (!pinode)
143                 goto posix_open_ret; /* caller does not need info */
144
145         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
146
147         /* get new inode and set it up */
148         if (*pinode == NULL) {
149                 cifs_fill_uniqueid(sb, &fattr);
150                 *pinode = cifs_iget(sb, &fattr);
151                 if (!*pinode) {
152                         rc = -ENOMEM;
153                         goto posix_open_ret;
154                 }
155         } else {
156                 cifs_revalidate_mapping(*pinode);
157                 rc = cifs_fattr_to_inode(*pinode, &fattr);
158         }
159
160 posix_open_ret:
161         kfree(presp_data);
162         return rc;
163 }
164
165 static int
166 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
167              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
168              struct cifs_fid *fid, unsigned int xid)
169 {
170         int rc;
171         int desired_access;
172         int disposition;
173         int create_options = CREATE_NOT_DIR;
174         FILE_ALL_INFO *buf;
175         struct TCP_Server_Info *server = tcon->ses->server;
176         struct cifs_open_parms oparms;
177
178         if (!server->ops->open)
179                 return -ENOSYS;
180
181         desired_access = cifs_convert_flags(f_flags);
182
183 /*********************************************************************
184  *  open flag mapping table:
185  *
186  *      POSIX Flag            CIFS Disposition
187  *      ----------            ----------------
188  *      O_CREAT               FILE_OPEN_IF
189  *      O_CREAT | O_EXCL      FILE_CREATE
190  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
191  *      O_TRUNC               FILE_OVERWRITE
192  *      none of the above     FILE_OPEN
193  *
194  *      Note that there is not a direct match between disposition
195  *      FILE_SUPERSEDE (ie create whether or not file exists although
196  *      O_CREAT | O_TRUNC is similar but truncates the existing
197  *      file rather than creating a new file as FILE_SUPERSEDE does
198  *      (which uses the attributes / metadata passed in on open call)
199  *?
200  *?  O_SYNC is a reasonable match to CIFS writethrough flag
201  *?  and the read write flags match reasonably.  O_LARGEFILE
202  *?  is irrelevant because largefile support is always used
203  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205  *********************************************************************/
206
207         disposition = cifs_get_disposition(f_flags);
208
209         /* BB pass O_SYNC flag through on file attributes .. BB */
210
211         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
212         if (!buf)
213                 return -ENOMEM;
214
215         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
216         if (f_flags & O_SYNC)
217                 create_options |= CREATE_WRITE_THROUGH;
218
219         if (f_flags & O_DIRECT)
220                 create_options |= CREATE_NO_BUFFER;
221
222         oparms.tcon = tcon;
223         oparms.cifs_sb = cifs_sb;
224         oparms.desired_access = desired_access;
225         oparms.create_options = cifs_create_options(cifs_sb, create_options);
226         oparms.disposition = disposition;
227         oparms.path = full_path;
228         oparms.fid = fid;
229         oparms.reconnect = false;
230
231         rc = server->ops->open(xid, &oparms, oplock, buf);
232
233         if (rc)
234                 goto out;
235
236         /* TODO: Add support for calling posix query info but with passing in fid */
237         if (tcon->unix_ext)
238                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
239                                               xid);
240         else
241                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
242                                          xid, fid);
243
244         if (rc) {
245                 server->ops->close(xid, tcon, fid);
246                 if (rc == -ESTALE)
247                         rc = -EOPENSTALE;
248         }
249
250 out:
251         kfree(buf);
252         return rc;
253 }
254
255 static bool
256 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
257 {
258         struct cifs_fid_locks *cur;
259         bool has_locks = false;
260
261         down_read(&cinode->lock_sem);
262         list_for_each_entry(cur, &cinode->llist, llist) {
263                 if (!list_empty(&cur->locks)) {
264                         has_locks = true;
265                         break;
266                 }
267         }
268         up_read(&cinode->lock_sem);
269         return has_locks;
270 }
271
272 void
273 cifs_down_write(struct rw_semaphore *sem)
274 {
275         while (!down_write_trylock(sem))
276                 msleep(10);
277 }
278
279 static void cifsFileInfo_put_work(struct work_struct *work);
280
281 struct cifsFileInfo *
282 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
283                   struct tcon_link *tlink, __u32 oplock)
284 {
285         struct dentry *dentry = file_dentry(file);
286         struct inode *inode = d_inode(dentry);
287         struct cifsInodeInfo *cinode = CIFS_I(inode);
288         struct cifsFileInfo *cfile;
289         struct cifs_fid_locks *fdlocks;
290         struct cifs_tcon *tcon = tlink_tcon(tlink);
291         struct TCP_Server_Info *server = tcon->ses->server;
292
293         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
294         if (cfile == NULL)
295                 return cfile;
296
297         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
298         if (!fdlocks) {
299                 kfree(cfile);
300                 return NULL;
301         }
302
303         INIT_LIST_HEAD(&fdlocks->locks);
304         fdlocks->cfile = cfile;
305         cfile->llist = fdlocks;
306
307         cfile->count = 1;
308         cfile->pid = current->tgid;
309         cfile->uid = current_fsuid();
310         cfile->dentry = dget(dentry);
311         cfile->f_flags = file->f_flags;
312         cfile->invalidHandle = false;
313         cfile->deferred_close_scheduled = false;
314         cfile->tlink = cifs_get_tlink(tlink);
315         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
316         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
317         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
318         mutex_init(&cfile->fh_mutex);
319         spin_lock_init(&cfile->file_info_lock);
320
321         cifs_sb_active(inode->i_sb);
322
323         /*
324          * If the server returned a read oplock and we have mandatory brlocks,
325          * set oplock level to None.
326          */
327         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
328                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
329                 oplock = 0;
330         }
331
332         cifs_down_write(&cinode->lock_sem);
333         list_add(&fdlocks->llist, &cinode->llist);
334         up_write(&cinode->lock_sem);
335
336         spin_lock(&tcon->open_file_lock);
337         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
338                 oplock = fid->pending_open->oplock;
339         list_del(&fid->pending_open->olist);
340
341         fid->purge_cache = false;
342         server->ops->set_fid(cfile, fid, oplock);
343
344         list_add(&cfile->tlist, &tcon->openFileList);
345         atomic_inc(&tcon->num_local_opens);
346
347         /* if readable file instance put first in list*/
348         spin_lock(&cinode->open_file_lock);
349         if (file->f_mode & FMODE_READ)
350                 list_add(&cfile->flist, &cinode->openFileList);
351         else
352                 list_add_tail(&cfile->flist, &cinode->openFileList);
353         spin_unlock(&cinode->open_file_lock);
354         spin_unlock(&tcon->open_file_lock);
355
356         if (fid->purge_cache)
357                 cifs_zap_mapping(inode);
358
359         file->private_data = cfile;
360         return cfile;
361 }
362
363 struct cifsFileInfo *
364 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
365 {
366         spin_lock(&cifs_file->file_info_lock);
367         cifsFileInfo_get_locked(cifs_file);
368         spin_unlock(&cifs_file->file_info_lock);
369         return cifs_file;
370 }
371
372 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
373 {
374         struct inode *inode = d_inode(cifs_file->dentry);
375         struct cifsInodeInfo *cifsi = CIFS_I(inode);
376         struct cifsLockInfo *li, *tmp;
377         struct super_block *sb = inode->i_sb;
378
379         cifs_fscache_release_inode_cookie(inode);
380
381         /*
382          * Delete any outstanding lock records. We'll lose them when the file
383          * is closed anyway.
384          */
385         cifs_down_write(&cifsi->lock_sem);
386         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
387                 list_del(&li->llist);
388                 cifs_del_lock_waiters(li);
389                 kfree(li);
390         }
391         list_del(&cifs_file->llist->llist);
392         kfree(cifs_file->llist);
393         up_write(&cifsi->lock_sem);
394
395         cifs_put_tlink(cifs_file->tlink);
396         dput(cifs_file->dentry);
397         cifs_sb_deactive(sb);
398         kfree(cifs_file);
399 }
400
401 static void cifsFileInfo_put_work(struct work_struct *work)
402 {
403         struct cifsFileInfo *cifs_file = container_of(work,
404                         struct cifsFileInfo, put);
405
406         cifsFileInfo_put_final(cifs_file);
407 }
408
409 /**
410  * cifsFileInfo_put - release a reference of file priv data
411  *
412  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
413  *
414  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
415  */
416 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
417 {
418         _cifsFileInfo_put(cifs_file, true, true);
419 }
420
421 /**
422  * _cifsFileInfo_put - release a reference of file priv data
423  *
424  * This may involve closing the filehandle @cifs_file out on the
425  * server. Must be called without holding tcon->open_file_lock,
426  * cinode->open_file_lock and cifs_file->file_info_lock.
427  *
428  * If @wait_for_oplock_handler is true and we are releasing the last
429  * reference, wait for any running oplock break handler of the file
430  * and cancel any pending one.
431  *
432  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
433  * @wait_oplock_handler: must be false if called from oplock_break_handler
434  * @offload:    not offloaded on close and oplock breaks
435  *
436  */
437 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
438                        bool wait_oplock_handler, bool offload)
439 {
440         struct inode *inode = d_inode(cifs_file->dentry);
441         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
442         struct TCP_Server_Info *server = tcon->ses->server;
443         struct cifsInodeInfo *cifsi = CIFS_I(inode);
444         struct super_block *sb = inode->i_sb;
445         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
446         struct cifs_fid fid;
447         struct cifs_pending_open open;
448         bool oplock_break_cancelled;
449
450         spin_lock(&tcon->open_file_lock);
451         spin_lock(&cifsi->open_file_lock);
452         spin_lock(&cifs_file->file_info_lock);
453         if (--cifs_file->count > 0) {
454                 spin_unlock(&cifs_file->file_info_lock);
455                 spin_unlock(&cifsi->open_file_lock);
456                 spin_unlock(&tcon->open_file_lock);
457                 return;
458         }
459         spin_unlock(&cifs_file->file_info_lock);
460
461         if (server->ops->get_lease_key)
462                 server->ops->get_lease_key(inode, &fid);
463
464         /* store open in pending opens to make sure we don't miss lease break */
465         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
466
467         /* remove it from the lists */
468         list_del(&cifs_file->flist);
469         list_del(&cifs_file->tlist);
470         atomic_dec(&tcon->num_local_opens);
471
472         if (list_empty(&cifsi->openFileList)) {
473                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
474                          d_inode(cifs_file->dentry));
475                 /*
476                  * In strict cache mode we need invalidate mapping on the last
477                  * close  because it may cause a error when we open this file
478                  * again and get at least level II oplock.
479                  */
480                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
481                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
482                 cifs_set_oplock_level(cifsi, 0);
483         }
484
485         spin_unlock(&cifsi->open_file_lock);
486         spin_unlock(&tcon->open_file_lock);
487
488         oplock_break_cancelled = wait_oplock_handler ?
489                 cancel_work_sync(&cifs_file->oplock_break) : false;
490
491         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
492                 struct TCP_Server_Info *server = tcon->ses->server;
493                 unsigned int xid;
494
495                 xid = get_xid();
496                 if (server->ops->close_getattr)
497                         server->ops->close_getattr(xid, tcon, cifs_file);
498                 else if (server->ops->close)
499                         server->ops->close(xid, tcon, &cifs_file->fid);
500                 _free_xid(xid);
501         }
502
503         if (oplock_break_cancelled)
504                 cifs_done_oplock_break(cifsi);
505
506         cifs_del_pending_open(&open);
507
508         if (offload)
509                 queue_work(fileinfo_put_wq, &cifs_file->put);
510         else
511                 cifsFileInfo_put_final(cifs_file);
512 }
513
514 int cifs_open(struct inode *inode, struct file *file)
515
516 {
517         int rc = -EACCES;
518         unsigned int xid;
519         __u32 oplock;
520         struct cifs_sb_info *cifs_sb;
521         struct TCP_Server_Info *server;
522         struct cifs_tcon *tcon;
523         struct tcon_link *tlink;
524         struct cifsFileInfo *cfile = NULL;
525         void *page;
526         const char *full_path;
527         bool posix_open_ok = false;
528         struct cifs_fid fid;
529         struct cifs_pending_open open;
530
531         xid = get_xid();
532
533         cifs_sb = CIFS_SB(inode->i_sb);
534         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
535                 free_xid(xid);
536                 return -EIO;
537         }
538
539         tlink = cifs_sb_tlink(cifs_sb);
540         if (IS_ERR(tlink)) {
541                 free_xid(xid);
542                 return PTR_ERR(tlink);
543         }
544         tcon = tlink_tcon(tlink);
545         server = tcon->ses->server;
546
547         page = alloc_dentry_path();
548         full_path = build_path_from_dentry(file_dentry(file), page);
549         if (IS_ERR(full_path)) {
550                 rc = PTR_ERR(full_path);
551                 goto out;
552         }
553
554         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
555                  inode, file->f_flags, full_path);
556
557         if (file->f_flags & O_DIRECT &&
558             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
559                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
560                         file->f_op = &cifs_file_direct_nobrl_ops;
561                 else
562                         file->f_op = &cifs_file_direct_ops;
563         }
564
565         /* Get the cached handle as SMB2 close is deferred */
566         rc = cifs_get_readable_path(tcon, full_path, &cfile);
567         if (rc == 0) {
568                 if (file->f_flags == cfile->f_flags) {
569                         file->private_data = cfile;
570                         spin_lock(&CIFS_I(inode)->deferred_lock);
571                         cifs_del_deferred_close(cfile);
572                         spin_unlock(&CIFS_I(inode)->deferred_lock);
573                         goto out;
574                 } else {
575                         _cifsFileInfo_put(cfile, true, false);
576                 }
577         }
578
579         if (server->oplocks)
580                 oplock = REQ_OPLOCK;
581         else
582                 oplock = 0;
583
584         if (!tcon->broken_posix_open && tcon->unix_ext &&
585             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
586                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
587                 /* can not refresh inode info since size could be stale */
588                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
589                                 cifs_sb->ctx->file_mode /* ignored */,
590                                 file->f_flags, &oplock, &fid.netfid, xid);
591                 if (rc == 0) {
592                         cifs_dbg(FYI, "posix open succeeded\n");
593                         posix_open_ok = true;
594                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
595                         if (tcon->ses->serverNOS)
596                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
597                                          tcon->ses->ip_addr,
598                                          tcon->ses->serverNOS);
599                         tcon->broken_posix_open = true;
600                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
601                          (rc != -EOPNOTSUPP)) /* path not found or net err */
602                         goto out;
603                 /*
604                  * Else fallthrough to retry open the old way on network i/o
605                  * or DFS errors.
606                  */
607         }
608
609         if (server->ops->get_lease_key)
610                 server->ops->get_lease_key(inode, &fid);
611
612         cifs_add_pending_open(&fid, tlink, &open);
613
614         if (!posix_open_ok) {
615                 if (server->ops->get_lease_key)
616                         server->ops->get_lease_key(inode, &fid);
617
618                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
619                                   file->f_flags, &oplock, &fid, xid);
620                 if (rc) {
621                         cifs_del_pending_open(&open);
622                         goto out;
623                 }
624         }
625
626         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
627         if (cfile == NULL) {
628                 if (server->ops->close)
629                         server->ops->close(xid, tcon, &fid);
630                 cifs_del_pending_open(&open);
631                 rc = -ENOMEM;
632                 goto out;
633         }
634
635         cifs_fscache_set_inode_cookie(inode, file);
636
637         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
638                 /*
639                  * Time to set mode which we can not set earlier due to
640                  * problems creating new read-only files.
641                  */
642                 struct cifs_unix_set_info_args args = {
643                         .mode   = inode->i_mode,
644                         .uid    = INVALID_UID, /* no change */
645                         .gid    = INVALID_GID, /* no change */
646                         .ctime  = NO_CHANGE_64,
647                         .atime  = NO_CHANGE_64,
648                         .mtime  = NO_CHANGE_64,
649                         .device = 0,
650                 };
651                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
652                                        cfile->pid);
653         }
654
655 out:
656         free_dentry_path(page);
657         free_xid(xid);
658         cifs_put_tlink(tlink);
659         return rc;
660 }
661
662 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
663
664 /*
665  * Try to reacquire byte range locks that were released when session
666  * to server was lost.
667  */
668 static int
669 cifs_relock_file(struct cifsFileInfo *cfile)
670 {
671         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
672         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
673         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
674         int rc = 0;
675
676         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
677         if (cinode->can_cache_brlcks) {
678                 /* can cache locks - no need to relock */
679                 up_read(&cinode->lock_sem);
680                 return rc;
681         }
682
683         if (cap_unix(tcon->ses) &&
684             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
685             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
686                 rc = cifs_push_posix_locks(cfile);
687         else
688                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
689
690         up_read(&cinode->lock_sem);
691         return rc;
692 }
693
694 static int
695 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
696 {
697         int rc = -EACCES;
698         unsigned int xid;
699         __u32 oplock;
700         struct cifs_sb_info *cifs_sb;
701         struct cifs_tcon *tcon;
702         struct TCP_Server_Info *server;
703         struct cifsInodeInfo *cinode;
704         struct inode *inode;
705         void *page;
706         const char *full_path;
707         int desired_access;
708         int disposition = FILE_OPEN;
709         int create_options = CREATE_NOT_DIR;
710         struct cifs_open_parms oparms;
711
712         xid = get_xid();
713         mutex_lock(&cfile->fh_mutex);
714         if (!cfile->invalidHandle) {
715                 mutex_unlock(&cfile->fh_mutex);
716                 free_xid(xid);
717                 return 0;
718         }
719
720         inode = d_inode(cfile->dentry);
721         cifs_sb = CIFS_SB(inode->i_sb);
722         tcon = tlink_tcon(cfile->tlink);
723         server = tcon->ses->server;
724
725         /*
726          * Can not grab rename sem here because various ops, including those
727          * that already have the rename sem can end up causing writepage to get
728          * called and if the server was down that means we end up here, and we
729          * can never tell if the caller already has the rename_sem.
730          */
731         page = alloc_dentry_path();
732         full_path = build_path_from_dentry(cfile->dentry, page);
733         if (IS_ERR(full_path)) {
734                 mutex_unlock(&cfile->fh_mutex);
735                 free_dentry_path(page);
736                 free_xid(xid);
737                 return PTR_ERR(full_path);
738         }
739
740         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
741                  inode, cfile->f_flags, full_path);
742
743         if (tcon->ses->server->oplocks)
744                 oplock = REQ_OPLOCK;
745         else
746                 oplock = 0;
747
748         if (tcon->unix_ext && cap_unix(tcon->ses) &&
749             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
750                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
751                 /*
752                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
753                  * original open. Must mask them off for a reopen.
754                  */
755                 unsigned int oflags = cfile->f_flags &
756                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
757
758                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
759                                      cifs_sb->ctx->file_mode /* ignored */,
760                                      oflags, &oplock, &cfile->fid.netfid, xid);
761                 if (rc == 0) {
762                         cifs_dbg(FYI, "posix reopen succeeded\n");
763                         oparms.reconnect = true;
764                         goto reopen_success;
765                 }
766                 /*
767                  * fallthrough to retry open the old way on errors, especially
768                  * in the reconnect path it is important to retry hard
769                  */
770         }
771
772         desired_access = cifs_convert_flags(cfile->f_flags);
773
774         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
775         if (cfile->f_flags & O_SYNC)
776                 create_options |= CREATE_WRITE_THROUGH;
777
778         if (cfile->f_flags & O_DIRECT)
779                 create_options |= CREATE_NO_BUFFER;
780
781         if (server->ops->get_lease_key)
782                 server->ops->get_lease_key(inode, &cfile->fid);
783
784         oparms.tcon = tcon;
785         oparms.cifs_sb = cifs_sb;
786         oparms.desired_access = desired_access;
787         oparms.create_options = cifs_create_options(cifs_sb, create_options);
788         oparms.disposition = disposition;
789         oparms.path = full_path;
790         oparms.fid = &cfile->fid;
791         oparms.reconnect = true;
792
793         /*
794          * Can not refresh inode by passing in file_info buf to be returned by
795          * ops->open and then calling get_inode_info with returned buf since
796          * file might have write behind data that needs to be flushed and server
797          * version of file size can be stale. If we knew for sure that inode was
798          * not dirty locally we could do this.
799          */
800         rc = server->ops->open(xid, &oparms, &oplock, NULL);
801         if (rc == -ENOENT && oparms.reconnect == false) {
802                 /* durable handle timeout is expired - open the file again */
803                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
804                 /* indicate that we need to relock the file */
805                 oparms.reconnect = true;
806         }
807
808         if (rc) {
809                 mutex_unlock(&cfile->fh_mutex);
810                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
811                 cifs_dbg(FYI, "oplock: %d\n", oplock);
812                 goto reopen_error_exit;
813         }
814
815 reopen_success:
816         cfile->invalidHandle = false;
817         mutex_unlock(&cfile->fh_mutex);
818         cinode = CIFS_I(inode);
819
820         if (can_flush) {
821                 rc = filemap_write_and_wait(inode->i_mapping);
822                 if (!is_interrupt_error(rc))
823                         mapping_set_error(inode->i_mapping, rc);
824
825                 if (tcon->posix_extensions)
826                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
827                 else if (tcon->unix_ext)
828                         rc = cifs_get_inode_info_unix(&inode, full_path,
829                                                       inode->i_sb, xid);
830                 else
831                         rc = cifs_get_inode_info(&inode, full_path, NULL,
832                                                  inode->i_sb, xid, NULL);
833         }
834         /*
835          * Else we are writing out data to server already and could deadlock if
836          * we tried to flush data, and since we do not know if we have data that
837          * would invalidate the current end of file on the server we can not go
838          * to the server to get the new inode info.
839          */
840
841         /*
842          * If the server returned a read oplock and we have mandatory brlocks,
843          * set oplock level to None.
844          */
845         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
846                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
847                 oplock = 0;
848         }
849
850         server->ops->set_fid(cfile, &cfile->fid, oplock);
851         if (oparms.reconnect)
852                 cifs_relock_file(cfile);
853
854 reopen_error_exit:
855         free_dentry_path(page);
856         free_xid(xid);
857         return rc;
858 }
859
860 void smb2_deferred_work_close(struct work_struct *work)
861 {
862         struct cifsFileInfo *cfile = container_of(work,
863                         struct cifsFileInfo, deferred.work);
864
865         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
866         cifs_del_deferred_close(cfile);
867         cfile->deferred_close_scheduled = false;
868         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
869         _cifsFileInfo_put(cfile, true, false);
870 }
871
872 int cifs_close(struct inode *inode, struct file *file)
873 {
874         struct cifsFileInfo *cfile;
875         struct cifsInodeInfo *cinode = CIFS_I(inode);
876         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
877         struct cifs_deferred_close *dclose;
878
879         if (file->private_data != NULL) {
880                 cfile = file->private_data;
881                 file->private_data = NULL;
882                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
883                 if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
884                     && cinode->lease_granted &&
885                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
886                     dclose) {
887                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
888                                 inode->i_ctime = inode->i_mtime = current_time(inode);
889                                 cifs_fscache_update_inode_cookie(inode);
890                         }
891                         spin_lock(&cinode->deferred_lock);
892                         cifs_add_deferred_close(cfile, dclose);
893                         if (cfile->deferred_close_scheduled &&
894                             delayed_work_pending(&cfile->deferred)) {
895                                 /*
896                                  * If there is no pending work, mod_delayed_work queues new work.
897                                  * So, Increase the ref count to avoid use-after-free.
898                                  */
899                                 if (!mod_delayed_work(deferredclose_wq,
900                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
901                                         cifsFileInfo_get(cfile);
902                         } else {
903                                 /* Deferred close for files */
904                                 queue_delayed_work(deferredclose_wq,
905                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
906                                 cfile->deferred_close_scheduled = true;
907                                 spin_unlock(&cinode->deferred_lock);
908                                 return 0;
909                         }
910                         spin_unlock(&cinode->deferred_lock);
911                         _cifsFileInfo_put(cfile, true, false);
912                 } else {
913                         _cifsFileInfo_put(cfile, true, false);
914                         kfree(dclose);
915                 }
916         }
917
918         /* return code from the ->release op is always ignored */
919         return 0;
920 }
921
922 void
923 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
924 {
925         struct cifsFileInfo *open_file;
926         struct list_head *tmp;
927         struct list_head *tmp1;
928         struct list_head tmp_list;
929
930         if (!tcon->use_persistent || !tcon->need_reopen_files)
931                 return;
932
933         tcon->need_reopen_files = false;
934
935         cifs_dbg(FYI, "Reopen persistent handles\n");
936         INIT_LIST_HEAD(&tmp_list);
937
938         /* list all files open on tree connection, reopen resilient handles  */
939         spin_lock(&tcon->open_file_lock);
940         list_for_each(tmp, &tcon->openFileList) {
941                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
942                 if (!open_file->invalidHandle)
943                         continue;
944                 cifsFileInfo_get(open_file);
945                 list_add_tail(&open_file->rlist, &tmp_list);
946         }
947         spin_unlock(&tcon->open_file_lock);
948
949         list_for_each_safe(tmp, tmp1, &tmp_list) {
950                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
951                 if (cifs_reopen_file(open_file, false /* do not flush */))
952                         tcon->need_reopen_files = true;
953                 list_del_init(&open_file->rlist);
954                 cifsFileInfo_put(open_file);
955         }
956 }
957
958 int cifs_closedir(struct inode *inode, struct file *file)
959 {
960         int rc = 0;
961         unsigned int xid;
962         struct cifsFileInfo *cfile = file->private_data;
963         struct cifs_tcon *tcon;
964         struct TCP_Server_Info *server;
965         char *buf;
966
967         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
968
969         if (cfile == NULL)
970                 return rc;
971
972         xid = get_xid();
973         tcon = tlink_tcon(cfile->tlink);
974         server = tcon->ses->server;
975
976         cifs_dbg(FYI, "Freeing private data in close dir\n");
977         spin_lock(&cfile->file_info_lock);
978         if (server->ops->dir_needs_close(cfile)) {
979                 cfile->invalidHandle = true;
980                 spin_unlock(&cfile->file_info_lock);
981                 if (server->ops->close_dir)
982                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
983                 else
984                         rc = -ENOSYS;
985                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
986                 /* not much we can do if it fails anyway, ignore rc */
987                 rc = 0;
988         } else
989                 spin_unlock(&cfile->file_info_lock);
990
991         buf = cfile->srch_inf.ntwrk_buf_start;
992         if (buf) {
993                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
994                 cfile->srch_inf.ntwrk_buf_start = NULL;
995                 if (cfile->srch_inf.smallBuf)
996                         cifs_small_buf_release(buf);
997                 else
998                         cifs_buf_release(buf);
999         }
1000
1001         cifs_put_tlink(cfile->tlink);
1002         kfree(file->private_data);
1003         file->private_data = NULL;
1004         /* BB can we lock the filestruct while this is going on? */
1005         free_xid(xid);
1006         return rc;
1007 }
1008
1009 static struct cifsLockInfo *
1010 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1011 {
1012         struct cifsLockInfo *lock =
1013                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1014         if (!lock)
1015                 return lock;
1016         lock->offset = offset;
1017         lock->length = length;
1018         lock->type = type;
1019         lock->pid = current->tgid;
1020         lock->flags = flags;
1021         INIT_LIST_HEAD(&lock->blist);
1022         init_waitqueue_head(&lock->block_q);
1023         return lock;
1024 }
1025
1026 void
1027 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1028 {
1029         struct cifsLockInfo *li, *tmp;
1030         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1031                 list_del_init(&li->blist);
1032                 wake_up(&li->block_q);
1033         }
1034 }
1035
1036 #define CIFS_LOCK_OP    0
1037 #define CIFS_READ_OP    1
1038 #define CIFS_WRITE_OP   2
1039
1040 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1041 static bool
1042 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1043                             __u64 length, __u8 type, __u16 flags,
1044                             struct cifsFileInfo *cfile,
1045                             struct cifsLockInfo **conf_lock, int rw_check)
1046 {
1047         struct cifsLockInfo *li;
1048         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1049         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1050
1051         list_for_each_entry(li, &fdlocks->locks, llist) {
1052                 if (offset + length <= li->offset ||
1053                     offset >= li->offset + li->length)
1054                         continue;
1055                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1056                     server->ops->compare_fids(cfile, cur_cfile)) {
1057                         /* shared lock prevents write op through the same fid */
1058                         if (!(li->type & server->vals->shared_lock_type) ||
1059                             rw_check != CIFS_WRITE_OP)
1060                                 continue;
1061                 }
1062                 if ((type & server->vals->shared_lock_type) &&
1063                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1064                      current->tgid == li->pid) || type == li->type))
1065                         continue;
1066                 if (rw_check == CIFS_LOCK_OP &&
1067                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1068                     server->ops->compare_fids(cfile, cur_cfile))
1069                         continue;
1070                 if (conf_lock)
1071                         *conf_lock = li;
1072                 return true;
1073         }
1074         return false;
1075 }
1076
1077 bool
1078 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1079                         __u8 type, __u16 flags,
1080                         struct cifsLockInfo **conf_lock, int rw_check)
1081 {
1082         bool rc = false;
1083         struct cifs_fid_locks *cur;
1084         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1085
1086         list_for_each_entry(cur, &cinode->llist, llist) {
1087                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1088                                                  flags, cfile, conf_lock,
1089                                                  rw_check);
1090                 if (rc)
1091                         break;
1092         }
1093
1094         return rc;
1095 }
1096
1097 /*
1098  * Check if there is another lock that prevents us to set the lock (mandatory
1099  * style). If such a lock exists, update the flock structure with its
1100  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1101  * or leave it the same if we can't. Returns 0 if we don't need to request to
1102  * the server or 1 otherwise.
1103  */
1104 static int
1105 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1106                __u8 type, struct file_lock *flock)
1107 {
1108         int rc = 0;
1109         struct cifsLockInfo *conf_lock;
1110         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1111         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1112         bool exist;
1113
1114         down_read(&cinode->lock_sem);
1115
1116         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1117                                         flock->fl_flags, &conf_lock,
1118                                         CIFS_LOCK_OP);
1119         if (exist) {
1120                 flock->fl_start = conf_lock->offset;
1121                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1122                 flock->fl_pid = conf_lock->pid;
1123                 if (conf_lock->type & server->vals->shared_lock_type)
1124                         flock->fl_type = F_RDLCK;
1125                 else
1126                         flock->fl_type = F_WRLCK;
1127         } else if (!cinode->can_cache_brlcks)
1128                 rc = 1;
1129         else
1130                 flock->fl_type = F_UNLCK;
1131
1132         up_read(&cinode->lock_sem);
1133         return rc;
1134 }
1135
1136 static void
1137 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1138 {
1139         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1140         cifs_down_write(&cinode->lock_sem);
1141         list_add_tail(&lock->llist, &cfile->llist->locks);
1142         up_write(&cinode->lock_sem);
1143 }
1144
1145 /*
1146  * Set the byte-range lock (mandatory style). Returns:
1147  * 1) 0, if we set the lock and don't need to request to the server;
1148  * 2) 1, if no locks prevent us but we need to request to the server;
1149  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1150  */
1151 static int
1152 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1153                  bool wait)
1154 {
1155         struct cifsLockInfo *conf_lock;
1156         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1157         bool exist;
1158         int rc = 0;
1159
1160 try_again:
1161         exist = false;
1162         cifs_down_write(&cinode->lock_sem);
1163
1164         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1165                                         lock->type, lock->flags, &conf_lock,
1166                                         CIFS_LOCK_OP);
1167         if (!exist && cinode->can_cache_brlcks) {
1168                 list_add_tail(&lock->llist, &cfile->llist->locks);
1169                 up_write(&cinode->lock_sem);
1170                 return rc;
1171         }
1172
1173         if (!exist)
1174                 rc = 1;
1175         else if (!wait)
1176                 rc = -EACCES;
1177         else {
1178                 list_add_tail(&lock->blist, &conf_lock->blist);
1179                 up_write(&cinode->lock_sem);
1180                 rc = wait_event_interruptible(lock->block_q,
1181                                         (lock->blist.prev == &lock->blist) &&
1182                                         (lock->blist.next == &lock->blist));
1183                 if (!rc)
1184                         goto try_again;
1185                 cifs_down_write(&cinode->lock_sem);
1186                 list_del_init(&lock->blist);
1187         }
1188
1189         up_write(&cinode->lock_sem);
1190         return rc;
1191 }
1192
1193 /*
1194  * Check if there is another lock that prevents us to set the lock (posix
1195  * style). If such a lock exists, update the flock structure with its
1196  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1197  * or leave it the same if we can't. Returns 0 if we don't need to request to
1198  * the server or 1 otherwise.
1199  */
1200 static int
1201 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1202 {
1203         int rc = 0;
1204         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1205         unsigned char saved_type = flock->fl_type;
1206
1207         if ((flock->fl_flags & FL_POSIX) == 0)
1208                 return 1;
1209
1210         down_read(&cinode->lock_sem);
1211         posix_test_lock(file, flock);
1212
1213         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1214                 flock->fl_type = saved_type;
1215                 rc = 1;
1216         }
1217
1218         up_read(&cinode->lock_sem);
1219         return rc;
1220 }
1221
1222 /*
1223  * Set the byte-range lock (posix style). Returns:
1224  * 1) <0, if the error occurs while setting the lock;
1225  * 2) 0, if we set the lock and don't need to request to the server;
1226  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1227  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1228  */
1229 static int
1230 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1231 {
1232         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1233         int rc = FILE_LOCK_DEFERRED + 1;
1234
1235         if ((flock->fl_flags & FL_POSIX) == 0)
1236                 return rc;
1237
1238         cifs_down_write(&cinode->lock_sem);
1239         if (!cinode->can_cache_brlcks) {
1240                 up_write(&cinode->lock_sem);
1241                 return rc;
1242         }
1243
1244         rc = posix_lock_file(file, flock, NULL);
1245         up_write(&cinode->lock_sem);
1246         return rc;
1247 }
1248
1249 int
1250 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1251 {
1252         unsigned int xid;
1253         int rc = 0, stored_rc;
1254         struct cifsLockInfo *li, *tmp;
1255         struct cifs_tcon *tcon;
1256         unsigned int num, max_num, max_buf;
1257         LOCKING_ANDX_RANGE *buf, *cur;
1258         static const int types[] = {
1259                 LOCKING_ANDX_LARGE_FILES,
1260                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1261         };
1262         int i;
1263
1264         xid = get_xid();
1265         tcon = tlink_tcon(cfile->tlink);
1266
1267         /*
1268          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1269          * and check it before using.
1270          */
1271         max_buf = tcon->ses->server->maxBuf;
1272         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1273                 free_xid(xid);
1274                 return -EINVAL;
1275         }
1276
1277         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1278                      PAGE_SIZE);
1279         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1280                         PAGE_SIZE);
1281         max_num = (max_buf - sizeof(struct smb_hdr)) /
1282                                                 sizeof(LOCKING_ANDX_RANGE);
1283         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1284         if (!buf) {
1285                 free_xid(xid);
1286                 return -ENOMEM;
1287         }
1288
1289         for (i = 0; i < 2; i++) {
1290                 cur = buf;
1291                 num = 0;
1292                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1293                         if (li->type != types[i])
1294                                 continue;
1295                         cur->Pid = cpu_to_le16(li->pid);
1296                         cur->LengthLow = cpu_to_le32((u32)li->length);
1297                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1298                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1299                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1300                         if (++num == max_num) {
1301                                 stored_rc = cifs_lockv(xid, tcon,
1302                                                        cfile->fid.netfid,
1303                                                        (__u8)li->type, 0, num,
1304                                                        buf);
1305                                 if (stored_rc)
1306                                         rc = stored_rc;
1307                                 cur = buf;
1308                                 num = 0;
1309                         } else
1310                                 cur++;
1311                 }
1312
1313                 if (num) {
1314                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1315                                                (__u8)types[i], 0, num, buf);
1316                         if (stored_rc)
1317                                 rc = stored_rc;
1318                 }
1319         }
1320
1321         kfree(buf);
1322         free_xid(xid);
1323         return rc;
1324 }
1325
1326 static __u32
1327 hash_lockowner(fl_owner_t owner)
1328 {
1329         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1330 }
1331
1332 struct lock_to_push {
1333         struct list_head llist;
1334         __u64 offset;
1335         __u64 length;
1336         __u32 pid;
1337         __u16 netfid;
1338         __u8 type;
1339 };
1340
1341 static int
1342 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1343 {
1344         struct inode *inode = d_inode(cfile->dentry);
1345         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1346         struct file_lock *flock;
1347         struct file_lock_context *flctx = inode->i_flctx;
1348         unsigned int count = 0, i;
1349         int rc = 0, xid, type;
1350         struct list_head locks_to_send, *el;
1351         struct lock_to_push *lck, *tmp;
1352         __u64 length;
1353
1354         xid = get_xid();
1355
1356         if (!flctx)
1357                 goto out;
1358
1359         spin_lock(&flctx->flc_lock);
1360         list_for_each(el, &flctx->flc_posix) {
1361                 count++;
1362         }
1363         spin_unlock(&flctx->flc_lock);
1364
1365         INIT_LIST_HEAD(&locks_to_send);
1366
1367         /*
1368          * Allocating count locks is enough because no FL_POSIX locks can be
1369          * added to the list while we are holding cinode->lock_sem that
1370          * protects locking operations of this inode.
1371          */
1372         for (i = 0; i < count; i++) {
1373                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1374                 if (!lck) {
1375                         rc = -ENOMEM;
1376                         goto err_out;
1377                 }
1378                 list_add_tail(&lck->llist, &locks_to_send);
1379         }
1380
1381         el = locks_to_send.next;
1382         spin_lock(&flctx->flc_lock);
1383         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1384                 if (el == &locks_to_send) {
1385                         /*
1386                          * The list ended. We don't have enough allocated
1387                          * structures - something is really wrong.
1388                          */
1389                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1390                         break;
1391                 }
1392                 length = 1 + flock->fl_end - flock->fl_start;
1393                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1394                         type = CIFS_RDLCK;
1395                 else
1396                         type = CIFS_WRLCK;
1397                 lck = list_entry(el, struct lock_to_push, llist);
1398                 lck->pid = hash_lockowner(flock->fl_owner);
1399                 lck->netfid = cfile->fid.netfid;
1400                 lck->length = length;
1401                 lck->type = type;
1402                 lck->offset = flock->fl_start;
1403         }
1404         spin_unlock(&flctx->flc_lock);
1405
1406         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1407                 int stored_rc;
1408
1409                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1410                                              lck->offset, lck->length, NULL,
1411                                              lck->type, 0);
1412                 if (stored_rc)
1413                         rc = stored_rc;
1414                 list_del(&lck->llist);
1415                 kfree(lck);
1416         }
1417
1418 out:
1419         free_xid(xid);
1420         return rc;
1421 err_out:
1422         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1423                 list_del(&lck->llist);
1424                 kfree(lck);
1425         }
1426         goto out;
1427 }
1428
1429 static int
1430 cifs_push_locks(struct cifsFileInfo *cfile)
1431 {
1432         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1433         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1434         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1435         int rc = 0;
1436
1437         /* we are going to update can_cache_brlcks here - need a write access */
1438         cifs_down_write(&cinode->lock_sem);
1439         if (!cinode->can_cache_brlcks) {
1440                 up_write(&cinode->lock_sem);
1441                 return rc;
1442         }
1443
1444         if (cap_unix(tcon->ses) &&
1445             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1446             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1447                 rc = cifs_push_posix_locks(cfile);
1448         else
1449                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1450
1451         cinode->can_cache_brlcks = false;
1452         up_write(&cinode->lock_sem);
1453         return rc;
1454 }
1455
1456 static void
1457 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1458                 bool *wait_flag, struct TCP_Server_Info *server)
1459 {
1460         if (flock->fl_flags & FL_POSIX)
1461                 cifs_dbg(FYI, "Posix\n");
1462         if (flock->fl_flags & FL_FLOCK)
1463                 cifs_dbg(FYI, "Flock\n");
1464         if (flock->fl_flags & FL_SLEEP) {
1465                 cifs_dbg(FYI, "Blocking lock\n");
1466                 *wait_flag = true;
1467         }
1468         if (flock->fl_flags & FL_ACCESS)
1469                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1470         if (flock->fl_flags & FL_LEASE)
1471                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1472         if (flock->fl_flags &
1473             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1474                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1475                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1476
1477         *type = server->vals->large_lock_type;
1478         if (flock->fl_type == F_WRLCK) {
1479                 cifs_dbg(FYI, "F_WRLCK\n");
1480                 *type |= server->vals->exclusive_lock_type;
1481                 *lock = 1;
1482         } else if (flock->fl_type == F_UNLCK) {
1483                 cifs_dbg(FYI, "F_UNLCK\n");
1484                 *type |= server->vals->unlock_lock_type;
1485                 *unlock = 1;
1486                 /* Check if unlock includes more than one lock range */
1487         } else if (flock->fl_type == F_RDLCK) {
1488                 cifs_dbg(FYI, "F_RDLCK\n");
1489                 *type |= server->vals->shared_lock_type;
1490                 *lock = 1;
1491         } else if (flock->fl_type == F_EXLCK) {
1492                 cifs_dbg(FYI, "F_EXLCK\n");
1493                 *type |= server->vals->exclusive_lock_type;
1494                 *lock = 1;
1495         } else if (flock->fl_type == F_SHLCK) {
1496                 cifs_dbg(FYI, "F_SHLCK\n");
1497                 *type |= server->vals->shared_lock_type;
1498                 *lock = 1;
1499         } else
1500                 cifs_dbg(FYI, "Unknown type of lock\n");
1501 }
1502
1503 static int
1504 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1505            bool wait_flag, bool posix_lck, unsigned int xid)
1506 {
1507         int rc = 0;
1508         __u64 length = 1 + flock->fl_end - flock->fl_start;
1509         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1510         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1511         struct TCP_Server_Info *server = tcon->ses->server;
1512         __u16 netfid = cfile->fid.netfid;
1513
1514         if (posix_lck) {
1515                 int posix_lock_type;
1516
1517                 rc = cifs_posix_lock_test(file, flock);
1518                 if (!rc)
1519                         return rc;
1520
1521                 if (type & server->vals->shared_lock_type)
1522                         posix_lock_type = CIFS_RDLCK;
1523                 else
1524                         posix_lock_type = CIFS_WRLCK;
1525                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1526                                       hash_lockowner(flock->fl_owner),
1527                                       flock->fl_start, length, flock,
1528                                       posix_lock_type, wait_flag);
1529                 return rc;
1530         }
1531
1532         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1533         if (!rc)
1534                 return rc;
1535
1536         /* BB we could chain these into one lock request BB */
1537         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1538                                     1, 0, false);
1539         if (rc == 0) {
1540                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1541                                             type, 0, 1, false);
1542                 flock->fl_type = F_UNLCK;
1543                 if (rc != 0)
1544                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1545                                  rc);
1546                 return 0;
1547         }
1548
1549         if (type & server->vals->shared_lock_type) {
1550                 flock->fl_type = F_WRLCK;
1551                 return 0;
1552         }
1553
1554         type &= ~server->vals->exclusive_lock_type;
1555
1556         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1557                                     type | server->vals->shared_lock_type,
1558                                     1, 0, false);
1559         if (rc == 0) {
1560                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1561                         type | server->vals->shared_lock_type, 0, 1, false);
1562                 flock->fl_type = F_RDLCK;
1563                 if (rc != 0)
1564                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1565                                  rc);
1566         } else
1567                 flock->fl_type = F_WRLCK;
1568
1569         return 0;
1570 }
1571
1572 void
1573 cifs_move_llist(struct list_head *source, struct list_head *dest)
1574 {
1575         struct list_head *li, *tmp;
1576         list_for_each_safe(li, tmp, source)
1577                 list_move(li, dest);
1578 }
1579
1580 void
1581 cifs_free_llist(struct list_head *llist)
1582 {
1583         struct cifsLockInfo *li, *tmp;
1584         list_for_each_entry_safe(li, tmp, llist, llist) {
1585                 cifs_del_lock_waiters(li);
1586                 list_del(&li->llist);
1587                 kfree(li);
1588         }
1589 }
1590
1591 int
1592 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1593                   unsigned int xid)
1594 {
1595         int rc = 0, stored_rc;
1596         static const int types[] = {
1597                 LOCKING_ANDX_LARGE_FILES,
1598                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1599         };
1600         unsigned int i;
1601         unsigned int max_num, num, max_buf;
1602         LOCKING_ANDX_RANGE *buf, *cur;
1603         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1604         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1605         struct cifsLockInfo *li, *tmp;
1606         __u64 length = 1 + flock->fl_end - flock->fl_start;
1607         struct list_head tmp_llist;
1608
1609         INIT_LIST_HEAD(&tmp_llist);
1610
1611         /*
1612          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1613          * and check it before using.
1614          */
1615         max_buf = tcon->ses->server->maxBuf;
1616         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1617                 return -EINVAL;
1618
1619         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1620                      PAGE_SIZE);
1621         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1622                         PAGE_SIZE);
1623         max_num = (max_buf - sizeof(struct smb_hdr)) /
1624                                                 sizeof(LOCKING_ANDX_RANGE);
1625         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1626         if (!buf)
1627                 return -ENOMEM;
1628
1629         cifs_down_write(&cinode->lock_sem);
1630         for (i = 0; i < 2; i++) {
1631                 cur = buf;
1632                 num = 0;
1633                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1634                         if (flock->fl_start > li->offset ||
1635                             (flock->fl_start + length) <
1636                             (li->offset + li->length))
1637                                 continue;
1638                         if (current->tgid != li->pid)
1639                                 continue;
1640                         if (types[i] != li->type)
1641                                 continue;
1642                         if (cinode->can_cache_brlcks) {
1643                                 /*
1644                                  * We can cache brlock requests - simply remove
1645                                  * a lock from the file's list.
1646                                  */
1647                                 list_del(&li->llist);
1648                                 cifs_del_lock_waiters(li);
1649                                 kfree(li);
1650                                 continue;
1651                         }
1652                         cur->Pid = cpu_to_le16(li->pid);
1653                         cur->LengthLow = cpu_to_le32((u32)li->length);
1654                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1655                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1656                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1657                         /*
1658                          * We need to save a lock here to let us add it again to
1659                          * the file's list if the unlock range request fails on
1660                          * the server.
1661                          */
1662                         list_move(&li->llist, &tmp_llist);
1663                         if (++num == max_num) {
1664                                 stored_rc = cifs_lockv(xid, tcon,
1665                                                        cfile->fid.netfid,
1666                                                        li->type, num, 0, buf);
1667                                 if (stored_rc) {
1668                                         /*
1669                                          * We failed on the unlock range
1670                                          * request - add all locks from the tmp
1671                                          * list to the head of the file's list.
1672                                          */
1673                                         cifs_move_llist(&tmp_llist,
1674                                                         &cfile->llist->locks);
1675                                         rc = stored_rc;
1676                                 } else
1677                                         /*
1678                                          * The unlock range request succeed -
1679                                          * free the tmp list.
1680                                          */
1681                                         cifs_free_llist(&tmp_llist);
1682                                 cur = buf;
1683                                 num = 0;
1684                         } else
1685                                 cur++;
1686                 }
1687                 if (num) {
1688                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1689                                                types[i], num, 0, buf);
1690                         if (stored_rc) {
1691                                 cifs_move_llist(&tmp_llist,
1692                                                 &cfile->llist->locks);
1693                                 rc = stored_rc;
1694                         } else
1695                                 cifs_free_llist(&tmp_llist);
1696                 }
1697         }
1698
1699         up_write(&cinode->lock_sem);
1700         kfree(buf);
1701         return rc;
1702 }
1703
1704 static int
1705 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1706            bool wait_flag, bool posix_lck, int lock, int unlock,
1707            unsigned int xid)
1708 {
1709         int rc = 0;
1710         __u64 length = 1 + flock->fl_end - flock->fl_start;
1711         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1712         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1713         struct TCP_Server_Info *server = tcon->ses->server;
1714         struct inode *inode = d_inode(cfile->dentry);
1715
1716         if (posix_lck) {
1717                 int posix_lock_type;
1718
1719                 rc = cifs_posix_lock_set(file, flock);
1720                 if (rc <= FILE_LOCK_DEFERRED)
1721                         return rc;
1722
1723                 if (type & server->vals->shared_lock_type)
1724                         posix_lock_type = CIFS_RDLCK;
1725                 else
1726                         posix_lock_type = CIFS_WRLCK;
1727
1728                 if (unlock == 1)
1729                         posix_lock_type = CIFS_UNLCK;
1730
1731                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1732                                       hash_lockowner(flock->fl_owner),
1733                                       flock->fl_start, length,
1734                                       NULL, posix_lock_type, wait_flag);
1735                 goto out;
1736         }
1737
1738         if (lock) {
1739                 struct cifsLockInfo *lock;
1740
1741                 lock = cifs_lock_init(flock->fl_start, length, type,
1742                                       flock->fl_flags);
1743                 if (!lock)
1744                         return -ENOMEM;
1745
1746                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1747                 if (rc < 0) {
1748                         kfree(lock);
1749                         return rc;
1750                 }
1751                 if (!rc)
1752                         goto out;
1753
1754                 /*
1755                  * Windows 7 server can delay breaking lease from read to None
1756                  * if we set a byte-range lock on a file - break it explicitly
1757                  * before sending the lock to the server to be sure the next
1758                  * read won't conflict with non-overlapted locks due to
1759                  * pagereading.
1760                  */
1761                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1762                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1763                         cifs_zap_mapping(inode);
1764                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1765                                  inode);
1766                         CIFS_I(inode)->oplock = 0;
1767                 }
1768
1769                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1770                                             type, 1, 0, wait_flag);
1771                 if (rc) {
1772                         kfree(lock);
1773                         return rc;
1774                 }
1775
1776                 cifs_lock_add(cfile, lock);
1777         } else if (unlock)
1778                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1779
1780 out:
1781         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1782                 /*
1783                  * If this is a request to remove all locks because we
1784                  * are closing the file, it doesn't matter if the
1785                  * unlocking failed as both cifs.ko and the SMB server
1786                  * remove the lock on file close
1787                  */
1788                 if (rc) {
1789                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1790                         if (!(flock->fl_flags & FL_CLOSE))
1791                                 return rc;
1792                 }
1793                 rc = locks_lock_file_wait(file, flock);
1794         }
1795         return rc;
1796 }
1797
1798 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1799 {
1800         int rc, xid;
1801         int lock = 0, unlock = 0;
1802         bool wait_flag = false;
1803         bool posix_lck = false;
1804         struct cifs_sb_info *cifs_sb;
1805         struct cifs_tcon *tcon;
1806         struct cifsFileInfo *cfile;
1807         __u32 type;
1808
1809         xid = get_xid();
1810
1811         if (!(fl->fl_flags & FL_FLOCK)) {
1812                 rc = -ENOLCK;
1813                 free_xid(xid);
1814                 return rc;
1815         }
1816
1817         cfile = (struct cifsFileInfo *)file->private_data;
1818         tcon = tlink_tcon(cfile->tlink);
1819
1820         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1821                         tcon->ses->server);
1822         cifs_sb = CIFS_FILE_SB(file);
1823
1824         if (cap_unix(tcon->ses) &&
1825             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1826             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1827                 posix_lck = true;
1828
1829         if (!lock && !unlock) {
1830                 /*
1831                  * if no lock or unlock then nothing to do since we do not
1832                  * know what it is
1833                  */
1834                 rc = -EOPNOTSUPP;
1835                 free_xid(xid);
1836                 return rc;
1837         }
1838
1839         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1840                         xid);
1841         free_xid(xid);
1842         return rc;
1843
1844
1845 }
1846
1847 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1848 {
1849         int rc, xid;
1850         int lock = 0, unlock = 0;
1851         bool wait_flag = false;
1852         bool posix_lck = false;
1853         struct cifs_sb_info *cifs_sb;
1854         struct cifs_tcon *tcon;
1855         struct cifsFileInfo *cfile;
1856         __u32 type;
1857
1858         rc = -EACCES;
1859         xid = get_xid();
1860
1861         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1862                  cmd, flock->fl_flags, flock->fl_type,
1863                  flock->fl_start, flock->fl_end);
1864
1865         cfile = (struct cifsFileInfo *)file->private_data;
1866         tcon = tlink_tcon(cfile->tlink);
1867
1868         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1869                         tcon->ses->server);
1870         cifs_sb = CIFS_FILE_SB(file);
1871         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1872
1873         if (cap_unix(tcon->ses) &&
1874             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1875             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1876                 posix_lck = true;
1877         /*
1878          * BB add code here to normalize offset and length to account for
1879          * negative length which we can not accept over the wire.
1880          */
1881         if (IS_GETLK(cmd)) {
1882                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1883                 free_xid(xid);
1884                 return rc;
1885         }
1886
1887         if (!lock && !unlock) {
1888                 /*
1889                  * if no lock or unlock then nothing to do since we do not
1890                  * know what it is
1891                  */
1892                 free_xid(xid);
1893                 return -EOPNOTSUPP;
1894         }
1895
1896         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1897                         xid);
1898         free_xid(xid);
1899         return rc;
1900 }
1901
1902 /*
1903  * update the file size (if needed) after a write. Should be called with
1904  * the inode->i_lock held
1905  */
1906 void
1907 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1908                       unsigned int bytes_written)
1909 {
1910         loff_t end_of_write = offset + bytes_written;
1911
1912         if (end_of_write > cifsi->server_eof)
1913                 cifsi->server_eof = end_of_write;
1914 }
1915
1916 static ssize_t
1917 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1918            size_t write_size, loff_t *offset)
1919 {
1920         int rc = 0;
1921         unsigned int bytes_written = 0;
1922         unsigned int total_written;
1923         struct cifs_tcon *tcon;
1924         struct TCP_Server_Info *server;
1925         unsigned int xid;
1926         struct dentry *dentry = open_file->dentry;
1927         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1928         struct cifs_io_parms io_parms = {0};
1929
1930         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1931                  write_size, *offset, dentry);
1932
1933         tcon = tlink_tcon(open_file->tlink);
1934         server = tcon->ses->server;
1935
1936         if (!server->ops->sync_write)
1937                 return -ENOSYS;
1938
1939         xid = get_xid();
1940
1941         for (total_written = 0; write_size > total_written;
1942              total_written += bytes_written) {
1943                 rc = -EAGAIN;
1944                 while (rc == -EAGAIN) {
1945                         struct kvec iov[2];
1946                         unsigned int len;
1947
1948                         if (open_file->invalidHandle) {
1949                                 /* we could deadlock if we called
1950                                    filemap_fdatawait from here so tell
1951                                    reopen_file not to flush data to
1952                                    server now */
1953                                 rc = cifs_reopen_file(open_file, false);
1954                                 if (rc != 0)
1955                                         break;
1956                         }
1957
1958                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1959                                   (unsigned int)write_size - total_written);
1960                         /* iov[0] is reserved for smb header */
1961                         iov[1].iov_base = (char *)write_data + total_written;
1962                         iov[1].iov_len = len;
1963                         io_parms.pid = pid;
1964                         io_parms.tcon = tcon;
1965                         io_parms.offset = *offset;
1966                         io_parms.length = len;
1967                         rc = server->ops->sync_write(xid, &open_file->fid,
1968                                         &io_parms, &bytes_written, iov, 1);
1969                 }
1970                 if (rc || (bytes_written == 0)) {
1971                         if (total_written)
1972                                 break;
1973                         else {
1974                                 free_xid(xid);
1975                                 return rc;
1976                         }
1977                 } else {
1978                         spin_lock(&d_inode(dentry)->i_lock);
1979                         cifs_update_eof(cifsi, *offset, bytes_written);
1980                         spin_unlock(&d_inode(dentry)->i_lock);
1981                         *offset += bytes_written;
1982                 }
1983         }
1984
1985         cifs_stats_bytes_written(tcon, total_written);
1986
1987         if (total_written > 0) {
1988                 spin_lock(&d_inode(dentry)->i_lock);
1989                 if (*offset > d_inode(dentry)->i_size) {
1990                         i_size_write(d_inode(dentry), *offset);
1991                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1992                 }
1993                 spin_unlock(&d_inode(dentry)->i_lock);
1994         }
1995         mark_inode_dirty_sync(d_inode(dentry));
1996         free_xid(xid);
1997         return total_written;
1998 }
1999
2000 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2001                                         bool fsuid_only)
2002 {
2003         struct cifsFileInfo *open_file = NULL;
2004         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2005
2006         /* only filter by fsuid on multiuser mounts */
2007         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2008                 fsuid_only = false;
2009
2010         spin_lock(&cifs_inode->open_file_lock);
2011         /* we could simply get the first_list_entry since write-only entries
2012            are always at the end of the list but since the first entry might
2013            have a close pending, we go through the whole list */
2014         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2015                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2016                         continue;
2017                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2018                         if ((!open_file->invalidHandle)) {
2019                                 /* found a good file */
2020                                 /* lock it so it will not be closed on us */
2021                                 cifsFileInfo_get(open_file);
2022                                 spin_unlock(&cifs_inode->open_file_lock);
2023                                 return open_file;
2024                         } /* else might as well continue, and look for
2025                              another, or simply have the caller reopen it
2026                              again rather than trying to fix this handle */
2027                 } else /* write only file */
2028                         break; /* write only files are last so must be done */
2029         }
2030         spin_unlock(&cifs_inode->open_file_lock);
2031         return NULL;
2032 }
2033
2034 /* Return -EBADF if no handle is found and general rc otherwise */
2035 int
2036 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2037                        struct cifsFileInfo **ret_file)
2038 {
2039         struct cifsFileInfo *open_file, *inv_file = NULL;
2040         struct cifs_sb_info *cifs_sb;
2041         bool any_available = false;
2042         int rc = -EBADF;
2043         unsigned int refind = 0;
2044         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2045         bool with_delete = flags & FIND_WR_WITH_DELETE;
2046         *ret_file = NULL;
2047
2048         /*
2049          * Having a null inode here (because mapping->host was set to zero by
2050          * the VFS or MM) should not happen but we had reports of on oops (due
2051          * to it being zero) during stress testcases so we need to check for it
2052          */
2053
2054         if (cifs_inode == NULL) {
2055                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2056                 dump_stack();
2057                 return rc;
2058         }
2059
2060         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2061
2062         /* only filter by fsuid on multiuser mounts */
2063         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2064                 fsuid_only = false;
2065
2066         spin_lock(&cifs_inode->open_file_lock);
2067 refind_writable:
2068         if (refind > MAX_REOPEN_ATT) {
2069                 spin_unlock(&cifs_inode->open_file_lock);
2070                 return rc;
2071         }
2072         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2073                 if (!any_available && open_file->pid != current->tgid)
2074                         continue;
2075                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2076                         continue;
2077                 if (with_delete && !(open_file->fid.access & DELETE))
2078                         continue;
2079                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2080                         if (!open_file->invalidHandle) {
2081                                 /* found a good writable file */
2082                                 cifsFileInfo_get(open_file);
2083                                 spin_unlock(&cifs_inode->open_file_lock);
2084                                 *ret_file = open_file;
2085                                 return 0;
2086                         } else {
2087                                 if (!inv_file)
2088                                         inv_file = open_file;
2089                         }
2090                 }
2091         }
2092         /* couldn't find useable FH with same pid, try any available */
2093         if (!any_available) {
2094                 any_available = true;
2095                 goto refind_writable;
2096         }
2097
2098         if (inv_file) {
2099                 any_available = false;
2100                 cifsFileInfo_get(inv_file);
2101         }
2102
2103         spin_unlock(&cifs_inode->open_file_lock);
2104
2105         if (inv_file) {
2106                 rc = cifs_reopen_file(inv_file, false);
2107                 if (!rc) {
2108                         *ret_file = inv_file;
2109                         return 0;
2110                 }
2111
2112                 spin_lock(&cifs_inode->open_file_lock);
2113                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2114                 spin_unlock(&cifs_inode->open_file_lock);
2115                 cifsFileInfo_put(inv_file);
2116                 ++refind;
2117                 inv_file = NULL;
2118                 spin_lock(&cifs_inode->open_file_lock);
2119                 goto refind_writable;
2120         }
2121
2122         return rc;
2123 }
2124
2125 struct cifsFileInfo *
2126 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2127 {
2128         struct cifsFileInfo *cfile;
2129         int rc;
2130
2131         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2132         if (rc)
2133                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2134
2135         return cfile;
2136 }
2137
2138 int
2139 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2140                        int flags,
2141                        struct cifsFileInfo **ret_file)
2142 {
2143         struct cifsFileInfo *cfile;
2144         void *page = alloc_dentry_path();
2145
2146         *ret_file = NULL;
2147
2148         spin_lock(&tcon->open_file_lock);
2149         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2150                 struct cifsInodeInfo *cinode;
2151                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2152                 if (IS_ERR(full_path)) {
2153                         spin_unlock(&tcon->open_file_lock);
2154                         free_dentry_path(page);
2155                         return PTR_ERR(full_path);
2156                 }
2157                 if (strcmp(full_path, name))
2158                         continue;
2159
2160                 cinode = CIFS_I(d_inode(cfile->dentry));
2161                 spin_unlock(&tcon->open_file_lock);
2162                 free_dentry_path(page);
2163                 return cifs_get_writable_file(cinode, flags, ret_file);
2164         }
2165
2166         spin_unlock(&tcon->open_file_lock);
2167         free_dentry_path(page);
2168         return -ENOENT;
2169 }
2170
2171 int
2172 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2173                        struct cifsFileInfo **ret_file)
2174 {
2175         struct cifsFileInfo *cfile;
2176         void *page = alloc_dentry_path();
2177
2178         *ret_file = NULL;
2179
2180         spin_lock(&tcon->open_file_lock);
2181         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2182                 struct cifsInodeInfo *cinode;
2183                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2184                 if (IS_ERR(full_path)) {
2185                         spin_unlock(&tcon->open_file_lock);
2186                         free_dentry_path(page);
2187                         return PTR_ERR(full_path);
2188                 }
2189                 if (strcmp(full_path, name))
2190                         continue;
2191
2192                 cinode = CIFS_I(d_inode(cfile->dentry));
2193                 spin_unlock(&tcon->open_file_lock);
2194                 free_dentry_path(page);
2195                 *ret_file = find_readable_file(cinode, 0);
2196                 return *ret_file ? 0 : -ENOENT;
2197         }
2198
2199         spin_unlock(&tcon->open_file_lock);
2200         free_dentry_path(page);
2201         return -ENOENT;
2202 }
2203
2204 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2205 {
2206         struct address_space *mapping = page->mapping;
2207         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2208         char *write_data;
2209         int rc = -EFAULT;
2210         int bytes_written = 0;
2211         struct inode *inode;
2212         struct cifsFileInfo *open_file;
2213
2214         if (!mapping || !mapping->host)
2215                 return -EFAULT;
2216
2217         inode = page->mapping->host;
2218
2219         offset += (loff_t)from;
2220         write_data = kmap(page);
2221         write_data += from;
2222
2223         if ((to > PAGE_SIZE) || (from > to)) {
2224                 kunmap(page);
2225                 return -EIO;
2226         }
2227
2228         /* racing with truncate? */
2229         if (offset > mapping->host->i_size) {
2230                 kunmap(page);
2231                 return 0; /* don't care */
2232         }
2233
2234         /* check to make sure that we are not extending the file */
2235         if (mapping->host->i_size - offset < (loff_t)to)
2236                 to = (unsigned)(mapping->host->i_size - offset);
2237
2238         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2239                                     &open_file);
2240         if (!rc) {
2241                 bytes_written = cifs_write(open_file, open_file->pid,
2242                                            write_data, to - from, &offset);
2243                 cifsFileInfo_put(open_file);
2244                 /* Does mm or vfs already set times? */
2245                 inode->i_atime = inode->i_mtime = current_time(inode);
2246                 if ((bytes_written > 0) && (offset))
2247                         rc = 0;
2248                 else if (bytes_written < 0)
2249                         rc = bytes_written;
2250                 else
2251                         rc = -EFAULT;
2252         } else {
2253                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2254                 if (!is_retryable_error(rc))
2255                         rc = -EIO;
2256         }
2257
2258         kunmap(page);
2259         return rc;
2260 }
2261
2262 static struct cifs_writedata *
2263 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2264                           pgoff_t end, pgoff_t *index,
2265                           unsigned int *found_pages)
2266 {
2267         struct cifs_writedata *wdata;
2268
2269         wdata = cifs_writedata_alloc((unsigned int)tofind,
2270                                      cifs_writev_complete);
2271         if (!wdata)
2272                 return NULL;
2273
2274         *found_pages = find_get_pages_range_tag(mapping, index, end,
2275                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2276         return wdata;
2277 }
2278
2279 static unsigned int
2280 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2281                     struct address_space *mapping,
2282                     struct writeback_control *wbc,
2283                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2284 {
2285         unsigned int nr_pages = 0, i;
2286         struct page *page;
2287
2288         for (i = 0; i < found_pages; i++) {
2289                 page = wdata->pages[i];
2290                 /*
2291                  * At this point we hold neither the i_pages lock nor the
2292                  * page lock: the page may be truncated or invalidated
2293                  * (changing page->mapping to NULL), or even swizzled
2294                  * back from swapper_space to tmpfs file mapping
2295                  */
2296
2297                 if (nr_pages == 0)
2298                         lock_page(page);
2299                 else if (!trylock_page(page))
2300                         break;
2301
2302                 if (unlikely(page->mapping != mapping)) {
2303                         unlock_page(page);
2304                         break;
2305                 }
2306
2307                 if (!wbc->range_cyclic && page->index > end) {
2308                         *done = true;
2309                         unlock_page(page);
2310                         break;
2311                 }
2312
2313                 if (*next && (page->index != *next)) {
2314                         /* Not next consecutive page */
2315                         unlock_page(page);
2316                         break;
2317                 }
2318
2319                 if (wbc->sync_mode != WB_SYNC_NONE)
2320                         wait_on_page_writeback(page);
2321
2322                 if (PageWriteback(page) ||
2323                                 !clear_page_dirty_for_io(page)) {
2324                         unlock_page(page);
2325                         break;
2326                 }
2327
2328                 /*
2329                  * This actually clears the dirty bit in the radix tree.
2330                  * See cifs_writepage() for more commentary.
2331                  */
2332                 set_page_writeback(page);
2333                 if (page_offset(page) >= i_size_read(mapping->host)) {
2334                         *done = true;
2335                         unlock_page(page);
2336                         end_page_writeback(page);
2337                         break;
2338                 }
2339
2340                 wdata->pages[i] = page;
2341                 *next = page->index + 1;
2342                 ++nr_pages;
2343         }
2344
2345         /* reset index to refind any pages skipped */
2346         if (nr_pages == 0)
2347                 *index = wdata->pages[0]->index + 1;
2348
2349         /* put any pages we aren't going to use */
2350         for (i = nr_pages; i < found_pages; i++) {
2351                 put_page(wdata->pages[i]);
2352                 wdata->pages[i] = NULL;
2353         }
2354
2355         return nr_pages;
2356 }
2357
2358 static int
2359 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2360                  struct address_space *mapping, struct writeback_control *wbc)
2361 {
2362         int rc;
2363
2364         wdata->sync_mode = wbc->sync_mode;
2365         wdata->nr_pages = nr_pages;
2366         wdata->offset = page_offset(wdata->pages[0]);
2367         wdata->pagesz = PAGE_SIZE;
2368         wdata->tailsz = min(i_size_read(mapping->host) -
2369                         page_offset(wdata->pages[nr_pages - 1]),
2370                         (loff_t)PAGE_SIZE);
2371         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2372         wdata->pid = wdata->cfile->pid;
2373
2374         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2375         if (rc)
2376                 return rc;
2377
2378         if (wdata->cfile->invalidHandle)
2379                 rc = -EAGAIN;
2380         else
2381                 rc = wdata->server->ops->async_writev(wdata,
2382                                                       cifs_writedata_release);
2383
2384         return rc;
2385 }
2386
2387 static int cifs_writepages(struct address_space *mapping,
2388                            struct writeback_control *wbc)
2389 {
2390         struct inode *inode = mapping->host;
2391         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2392         struct TCP_Server_Info *server;
2393         bool done = false, scanned = false, range_whole = false;
2394         pgoff_t end, index;
2395         struct cifs_writedata *wdata;
2396         struct cifsFileInfo *cfile = NULL;
2397         int rc = 0;
2398         int saved_rc = 0;
2399         unsigned int xid;
2400
2401         /*
2402          * If wsize is smaller than the page cache size, default to writing
2403          * one page at a time via cifs_writepage
2404          */
2405         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2406                 return generic_writepages(mapping, wbc);
2407
2408         xid = get_xid();
2409         if (wbc->range_cyclic) {
2410                 index = mapping->writeback_index; /* Start from prev offset */
2411                 end = -1;
2412         } else {
2413                 index = wbc->range_start >> PAGE_SHIFT;
2414                 end = wbc->range_end >> PAGE_SHIFT;
2415                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2416                         range_whole = true;
2417                 scanned = true;
2418         }
2419         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2420
2421 retry:
2422         while (!done && index <= end) {
2423                 unsigned int i, nr_pages, found_pages, wsize;
2424                 pgoff_t next = 0, tofind, saved_index = index;
2425                 struct cifs_credits credits_on_stack;
2426                 struct cifs_credits *credits = &credits_on_stack;
2427                 int get_file_rc = 0;
2428
2429                 if (cfile)
2430                         cifsFileInfo_put(cfile);
2431
2432                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2433
2434                 /* in case of an error store it to return later */
2435                 if (rc)
2436                         get_file_rc = rc;
2437
2438                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2439                                                    &wsize, credits);
2440                 if (rc != 0) {
2441                         done = true;
2442                         break;
2443                 }
2444
2445                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2446
2447                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2448                                                   &found_pages);
2449                 if (!wdata) {
2450                         rc = -ENOMEM;
2451                         done = true;
2452                         add_credits_and_wake_if(server, credits, 0);
2453                         break;
2454                 }
2455
2456                 if (found_pages == 0) {
2457                         kref_put(&wdata->refcount, cifs_writedata_release);
2458                         add_credits_and_wake_if(server, credits, 0);
2459                         break;
2460                 }
2461
2462                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2463                                                end, &index, &next, &done);
2464
2465                 /* nothing to write? */
2466                 if (nr_pages == 0) {
2467                         kref_put(&wdata->refcount, cifs_writedata_release);
2468                         add_credits_and_wake_if(server, credits, 0);
2469                         continue;
2470                 }
2471
2472                 wdata->credits = credits_on_stack;
2473                 wdata->cfile = cfile;
2474                 wdata->server = server;
2475                 cfile = NULL;
2476
2477                 if (!wdata->cfile) {
2478                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2479                                  get_file_rc);
2480                         if (is_retryable_error(get_file_rc))
2481                                 rc = get_file_rc;
2482                         else
2483                                 rc = -EBADF;
2484                 } else
2485                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2486
2487                 for (i = 0; i < nr_pages; ++i)
2488                         unlock_page(wdata->pages[i]);
2489
2490                 /* send failure -- clean up the mess */
2491                 if (rc != 0) {
2492                         add_credits_and_wake_if(server, &wdata->credits, 0);
2493                         for (i = 0; i < nr_pages; ++i) {
2494                                 if (is_retryable_error(rc))
2495                                         redirty_page_for_writepage(wbc,
2496                                                            wdata->pages[i]);
2497                                 else
2498                                         SetPageError(wdata->pages[i]);
2499                                 end_page_writeback(wdata->pages[i]);
2500                                 put_page(wdata->pages[i]);
2501                         }
2502                         if (!is_retryable_error(rc))
2503                                 mapping_set_error(mapping, rc);
2504                 }
2505                 kref_put(&wdata->refcount, cifs_writedata_release);
2506
2507                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2508                         index = saved_index;
2509                         continue;
2510                 }
2511
2512                 /* Return immediately if we received a signal during writing */
2513                 if (is_interrupt_error(rc)) {
2514                         done = true;
2515                         break;
2516                 }
2517
2518                 if (rc != 0 && saved_rc == 0)
2519                         saved_rc = rc;
2520
2521                 wbc->nr_to_write -= nr_pages;
2522                 if (wbc->nr_to_write <= 0)
2523                         done = true;
2524
2525                 index = next;
2526         }
2527
2528         if (!scanned && !done) {
2529                 /*
2530                  * We hit the last page and there is more work to be done: wrap
2531                  * back to the start of the file
2532                  */
2533                 scanned = true;
2534                 index = 0;
2535                 goto retry;
2536         }
2537
2538         if (saved_rc != 0)
2539                 rc = saved_rc;
2540
2541         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2542                 mapping->writeback_index = index;
2543
2544         if (cfile)
2545                 cifsFileInfo_put(cfile);
2546         free_xid(xid);
2547         /* Indication to update ctime and mtime as close is deferred */
2548         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2549         return rc;
2550 }
2551
2552 static int
2553 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2554 {
2555         int rc;
2556         unsigned int xid;
2557
2558         xid = get_xid();
2559 /* BB add check for wbc flags */
2560         get_page(page);
2561         if (!PageUptodate(page))
2562                 cifs_dbg(FYI, "ppw - page not up to date\n");
2563
2564         /*
2565          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2566          *
2567          * A writepage() implementation always needs to do either this,
2568          * or re-dirty the page with "redirty_page_for_writepage()" in
2569          * the case of a failure.
2570          *
2571          * Just unlocking the page will cause the radix tree tag-bits
2572          * to fail to update with the state of the page correctly.
2573          */
2574         set_page_writeback(page);
2575 retry_write:
2576         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2577         if (is_retryable_error(rc)) {
2578                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2579                         goto retry_write;
2580                 redirty_page_for_writepage(wbc, page);
2581         } else if (rc != 0) {
2582                 SetPageError(page);
2583                 mapping_set_error(page->mapping, rc);
2584         } else {
2585                 SetPageUptodate(page);
2586         }
2587         end_page_writeback(page);
2588         put_page(page);
2589         free_xid(xid);
2590         return rc;
2591 }
2592
2593 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2594 {
2595         int rc = cifs_writepage_locked(page, wbc);
2596         unlock_page(page);
2597         return rc;
2598 }
2599
2600 static int cifs_write_end(struct file *file, struct address_space *mapping,
2601                         loff_t pos, unsigned len, unsigned copied,
2602                         struct page *page, void *fsdata)
2603 {
2604         int rc;
2605         struct inode *inode = mapping->host;
2606         struct cifsFileInfo *cfile = file->private_data;
2607         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2608         __u32 pid;
2609
2610         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2611                 pid = cfile->pid;
2612         else
2613                 pid = current->tgid;
2614
2615         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2616                  page, pos, copied);
2617
2618         if (PageChecked(page)) {
2619                 if (copied == len)
2620                         SetPageUptodate(page);
2621                 ClearPageChecked(page);
2622         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2623                 SetPageUptodate(page);
2624
2625         if (!PageUptodate(page)) {
2626                 char *page_data;
2627                 unsigned offset = pos & (PAGE_SIZE - 1);
2628                 unsigned int xid;
2629
2630                 xid = get_xid();
2631                 /* this is probably better than directly calling
2632                    partialpage_write since in this function the file handle is
2633                    known which we might as well leverage */
2634                 /* BB check if anything else missing out of ppw
2635                    such as updating last write time */
2636                 page_data = kmap(page);
2637                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2638                 /* if (rc < 0) should we set writebehind rc? */
2639                 kunmap(page);
2640
2641                 free_xid(xid);
2642         } else {
2643                 rc = copied;
2644                 pos += copied;
2645                 set_page_dirty(page);
2646         }
2647
2648         if (rc > 0) {
2649                 spin_lock(&inode->i_lock);
2650                 if (pos > inode->i_size) {
2651                         i_size_write(inode, pos);
2652                         inode->i_blocks = (512 - 1 + pos) >> 9;
2653                 }
2654                 spin_unlock(&inode->i_lock);
2655         }
2656
2657         unlock_page(page);
2658         put_page(page);
2659         /* Indication to update ctime and mtime as close is deferred */
2660         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2661
2662         return rc;
2663 }
2664
2665 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2666                       int datasync)
2667 {
2668         unsigned int xid;
2669         int rc = 0;
2670         struct cifs_tcon *tcon;
2671         struct TCP_Server_Info *server;
2672         struct cifsFileInfo *smbfile = file->private_data;
2673         struct inode *inode = file_inode(file);
2674         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2675
2676         rc = file_write_and_wait_range(file, start, end);
2677         if (rc) {
2678                 trace_cifs_fsync_err(inode->i_ino, rc);
2679                 return rc;
2680         }
2681
2682         xid = get_xid();
2683
2684         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2685                  file, datasync);
2686
2687         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2688                 rc = cifs_zap_mapping(inode);
2689                 if (rc) {
2690                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2691                         rc = 0; /* don't care about it in fsync */
2692                 }
2693         }
2694
2695         tcon = tlink_tcon(smbfile->tlink);
2696         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2697                 server = tcon->ses->server;
2698                 if (server->ops->flush == NULL) {
2699                         rc = -ENOSYS;
2700                         goto strict_fsync_exit;
2701                 }
2702
2703                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2704                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2705                         if (smbfile) {
2706                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2707                                 cifsFileInfo_put(smbfile);
2708                         } else
2709                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2710                 } else
2711                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2712         }
2713
2714 strict_fsync_exit:
2715         free_xid(xid);
2716         return rc;
2717 }
2718
2719 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2720 {
2721         unsigned int xid;
2722         int rc = 0;
2723         struct cifs_tcon *tcon;
2724         struct TCP_Server_Info *server;
2725         struct cifsFileInfo *smbfile = file->private_data;
2726         struct inode *inode = file_inode(file);
2727         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2728
2729         rc = file_write_and_wait_range(file, start, end);
2730         if (rc) {
2731                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2732                 return rc;
2733         }
2734
2735         xid = get_xid();
2736
2737         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2738                  file, datasync);
2739
2740         tcon = tlink_tcon(smbfile->tlink);
2741         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2742                 server = tcon->ses->server;
2743                 if (server->ops->flush == NULL) {
2744                         rc = -ENOSYS;
2745                         goto fsync_exit;
2746                 }
2747
2748                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2749                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2750                         if (smbfile) {
2751                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2752                                 cifsFileInfo_put(smbfile);
2753                         } else
2754                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2755                 } else
2756                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2757         }
2758
2759 fsync_exit:
2760         free_xid(xid);
2761         return rc;
2762 }
2763
2764 /*
2765  * As file closes, flush all cached write data for this inode checking
2766  * for write behind errors.
2767  */
2768 int cifs_flush(struct file *file, fl_owner_t id)
2769 {
2770         struct inode *inode = file_inode(file);
2771         int rc = 0;
2772
2773         if (file->f_mode & FMODE_WRITE)
2774                 rc = filemap_write_and_wait(inode->i_mapping);
2775
2776         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2777         if (rc)
2778                 trace_cifs_flush_err(inode->i_ino, rc);
2779         return rc;
2780 }
2781
2782 static int
2783 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2784 {
2785         int rc = 0;
2786         unsigned long i;
2787
2788         for (i = 0; i < num_pages; i++) {
2789                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2790                 if (!pages[i]) {
2791                         /*
2792                          * save number of pages we have already allocated and
2793                          * return with ENOMEM error
2794                          */
2795                         num_pages = i;
2796                         rc = -ENOMEM;
2797                         break;
2798                 }
2799         }
2800
2801         if (rc) {
2802                 for (i = 0; i < num_pages; i++)
2803                         put_page(pages[i]);
2804         }
2805         return rc;
2806 }
2807
2808 static inline
2809 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2810 {
2811         size_t num_pages;
2812         size_t clen;
2813
2814         clen = min_t(const size_t, len, wsize);
2815         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2816
2817         if (cur_len)
2818                 *cur_len = clen;
2819
2820         return num_pages;
2821 }
2822
2823 static void
2824 cifs_uncached_writedata_release(struct kref *refcount)
2825 {
2826         int i;
2827         struct cifs_writedata *wdata = container_of(refcount,
2828                                         struct cifs_writedata, refcount);
2829
2830         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2831         for (i = 0; i < wdata->nr_pages; i++)
2832                 put_page(wdata->pages[i]);
2833         cifs_writedata_release(refcount);
2834 }
2835
2836 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2837
2838 static void
2839 cifs_uncached_writev_complete(struct work_struct *work)
2840 {
2841         struct cifs_writedata *wdata = container_of(work,
2842                                         struct cifs_writedata, work);
2843         struct inode *inode = d_inode(wdata->cfile->dentry);
2844         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2845
2846         spin_lock(&inode->i_lock);
2847         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2848         if (cifsi->server_eof > inode->i_size)
2849                 i_size_write(inode, cifsi->server_eof);
2850         spin_unlock(&inode->i_lock);
2851
2852         complete(&wdata->done);
2853         collect_uncached_write_data(wdata->ctx);
2854         /* the below call can possibly free the last ref to aio ctx */
2855         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2856 }
2857
2858 static int
2859 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2860                       size_t *len, unsigned long *num_pages)
2861 {
2862         size_t save_len, copied, bytes, cur_len = *len;
2863         unsigned long i, nr_pages = *num_pages;
2864
2865         save_len = cur_len;
2866         for (i = 0; i < nr_pages; i++) {
2867                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2868                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2869                 cur_len -= copied;
2870                 /*
2871                  * If we didn't copy as much as we expected, then that
2872                  * may mean we trod into an unmapped area. Stop copying
2873                  * at that point. On the next pass through the big
2874                  * loop, we'll likely end up getting a zero-length
2875                  * write and bailing out of it.
2876                  */
2877                 if (copied < bytes)
2878                         break;
2879         }
2880         cur_len = save_len - cur_len;
2881         *len = cur_len;
2882
2883         /*
2884          * If we have no data to send, then that probably means that
2885          * the copy above failed altogether. That's most likely because
2886          * the address in the iovec was bogus. Return -EFAULT and let
2887          * the caller free anything we allocated and bail out.
2888          */
2889         if (!cur_len)
2890                 return -EFAULT;
2891
2892         /*
2893          * i + 1 now represents the number of pages we actually used in
2894          * the copy phase above.
2895          */
2896         *num_pages = i + 1;
2897         return 0;
2898 }
2899
2900 static int
2901 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2902         struct cifs_aio_ctx *ctx)
2903 {
2904         unsigned int wsize;
2905         struct cifs_credits credits;
2906         int rc;
2907         struct TCP_Server_Info *server = wdata->server;
2908
2909         do {
2910                 if (wdata->cfile->invalidHandle) {
2911                         rc = cifs_reopen_file(wdata->cfile, false);
2912                         if (rc == -EAGAIN)
2913                                 continue;
2914                         else if (rc)
2915                                 break;
2916                 }
2917
2918
2919                 /*
2920                  * Wait for credits to resend this wdata.
2921                  * Note: we are attempting to resend the whole wdata not in
2922                  * segments
2923                  */
2924                 do {
2925                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2926                                                 &wsize, &credits);
2927                         if (rc)
2928                                 goto fail;
2929
2930                         if (wsize < wdata->bytes) {
2931                                 add_credits_and_wake_if(server, &credits, 0);
2932                                 msleep(1000);
2933                         }
2934                 } while (wsize < wdata->bytes);
2935                 wdata->credits = credits;
2936
2937                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2938
2939                 if (!rc) {
2940                         if (wdata->cfile->invalidHandle)
2941                                 rc = -EAGAIN;
2942                         else {
2943 #ifdef CONFIG_CIFS_SMB_DIRECT
2944                                 if (wdata->mr) {
2945                                         wdata->mr->need_invalidate = true;
2946                                         smbd_deregister_mr(wdata->mr);
2947                                         wdata->mr = NULL;
2948                                 }
2949 #endif
2950                                 rc = server->ops->async_writev(wdata,
2951                                         cifs_uncached_writedata_release);
2952                         }
2953                 }
2954
2955                 /* If the write was successfully sent, we are done */
2956                 if (!rc) {
2957                         list_add_tail(&wdata->list, wdata_list);
2958                         return 0;
2959                 }
2960
2961                 /* Roll back credits and retry if needed */
2962                 add_credits_and_wake_if(server, &wdata->credits, 0);
2963         } while (rc == -EAGAIN);
2964
2965 fail:
2966         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2967         return rc;
2968 }
2969
2970 static int
2971 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2972                      struct cifsFileInfo *open_file,
2973                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2974                      struct cifs_aio_ctx *ctx)
2975 {
2976         int rc = 0;
2977         size_t cur_len;
2978         unsigned long nr_pages, num_pages, i;
2979         struct cifs_writedata *wdata;
2980         struct iov_iter saved_from = *from;
2981         loff_t saved_offset = offset;
2982         pid_t pid;
2983         struct TCP_Server_Info *server;
2984         struct page **pagevec;
2985         size_t start;
2986         unsigned int xid;
2987
2988         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2989                 pid = open_file->pid;
2990         else
2991                 pid = current->tgid;
2992
2993         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2994         xid = get_xid();
2995
2996         do {
2997                 unsigned int wsize;
2998                 struct cifs_credits credits_on_stack;
2999                 struct cifs_credits *credits = &credits_on_stack;
3000
3001                 if (open_file->invalidHandle) {
3002                         rc = cifs_reopen_file(open_file, false);
3003                         if (rc == -EAGAIN)
3004                                 continue;
3005                         else if (rc)
3006                                 break;
3007                 }
3008
3009                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3010                                                    &wsize, credits);
3011                 if (rc)
3012                         break;
3013
3014                 cur_len = min_t(const size_t, len, wsize);
3015
3016                 if (ctx->direct_io) {
3017                         ssize_t result;
3018
3019                         result = iov_iter_get_pages_alloc(
3020                                 from, &pagevec, cur_len, &start);
3021                         if (result < 0) {
3022                                 cifs_dbg(VFS,
3023                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3024                                          result, iov_iter_type(from),
3025                                          from->iov_offset, from->count);
3026                                 dump_stack();
3027
3028                                 rc = result;
3029                                 add_credits_and_wake_if(server, credits, 0);
3030                                 break;
3031                         }
3032                         cur_len = (size_t)result;
3033                         iov_iter_advance(from, cur_len);
3034
3035                         nr_pages =
3036                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3037
3038                         wdata = cifs_writedata_direct_alloc(pagevec,
3039                                              cifs_uncached_writev_complete);
3040                         if (!wdata) {
3041                                 rc = -ENOMEM;
3042                                 add_credits_and_wake_if(server, credits, 0);
3043                                 break;
3044                         }
3045
3046
3047                         wdata->page_offset = start;
3048                         wdata->tailsz =
3049                                 nr_pages > 1 ?
3050                                         cur_len - (PAGE_SIZE - start) -
3051                                         (nr_pages - 2) * PAGE_SIZE :
3052                                         cur_len;
3053                 } else {
3054                         nr_pages = get_numpages(wsize, len, &cur_len);
3055                         wdata = cifs_writedata_alloc(nr_pages,
3056                                              cifs_uncached_writev_complete);
3057                         if (!wdata) {
3058                                 rc = -ENOMEM;
3059                                 add_credits_and_wake_if(server, credits, 0);
3060                                 break;
3061                         }
3062
3063                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3064                         if (rc) {
3065                                 kvfree(wdata->pages);
3066                                 kfree(wdata);
3067                                 add_credits_and_wake_if(server, credits, 0);
3068                                 break;
3069                         }
3070
3071                         num_pages = nr_pages;
3072                         rc = wdata_fill_from_iovec(
3073                                 wdata, from, &cur_len, &num_pages);
3074                         if (rc) {
3075                                 for (i = 0; i < nr_pages; i++)
3076                                         put_page(wdata->pages[i]);
3077                                 kvfree(wdata->pages);
3078                                 kfree(wdata);
3079                                 add_credits_and_wake_if(server, credits, 0);
3080                                 break;
3081                         }
3082
3083                         /*
3084                          * Bring nr_pages down to the number of pages we
3085                          * actually used, and free any pages that we didn't use.
3086                          */
3087                         for ( ; nr_pages > num_pages; nr_pages--)
3088                                 put_page(wdata->pages[nr_pages - 1]);
3089
3090                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3091                 }
3092
3093                 wdata->sync_mode = WB_SYNC_ALL;
3094                 wdata->nr_pages = nr_pages;
3095                 wdata->offset = (__u64)offset;
3096                 wdata->cfile = cifsFileInfo_get(open_file);
3097                 wdata->server = server;
3098                 wdata->pid = pid;
3099                 wdata->bytes = cur_len;
3100                 wdata->pagesz = PAGE_SIZE;
3101                 wdata->credits = credits_on_stack;
3102                 wdata->ctx = ctx;
3103                 kref_get(&ctx->refcount);
3104
3105                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3106
3107                 if (!rc) {
3108                         if (wdata->cfile->invalidHandle)
3109                                 rc = -EAGAIN;
3110                         else
3111                                 rc = server->ops->async_writev(wdata,
3112                                         cifs_uncached_writedata_release);
3113                 }
3114
3115                 if (rc) {
3116                         add_credits_and_wake_if(server, &wdata->credits, 0);
3117                         kref_put(&wdata->refcount,
3118                                  cifs_uncached_writedata_release);
3119                         if (rc == -EAGAIN) {
3120                                 *from = saved_from;
3121                                 iov_iter_advance(from, offset - saved_offset);
3122                                 continue;
3123                         }
3124                         break;
3125                 }
3126
3127                 list_add_tail(&wdata->list, wdata_list);
3128                 offset += cur_len;
3129                 len -= cur_len;
3130         } while (len > 0);
3131
3132         free_xid(xid);
3133         return rc;
3134 }
3135
3136 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3137 {
3138         struct cifs_writedata *wdata, *tmp;
3139         struct cifs_tcon *tcon;
3140         struct cifs_sb_info *cifs_sb;
3141         struct dentry *dentry = ctx->cfile->dentry;
3142         ssize_t rc;
3143
3144         tcon = tlink_tcon(ctx->cfile->tlink);
3145         cifs_sb = CIFS_SB(dentry->d_sb);
3146
3147         mutex_lock(&ctx->aio_mutex);
3148
3149         if (list_empty(&ctx->list)) {
3150                 mutex_unlock(&ctx->aio_mutex);
3151                 return;
3152         }
3153
3154         rc = ctx->rc;
3155         /*
3156          * Wait for and collect replies for any successful sends in order of
3157          * increasing offset. Once an error is hit, then return without waiting
3158          * for any more replies.
3159          */
3160 restart_loop:
3161         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3162                 if (!rc) {
3163                         if (!try_wait_for_completion(&wdata->done)) {
3164                                 mutex_unlock(&ctx->aio_mutex);
3165                                 return;
3166                         }
3167
3168                         if (wdata->result)
3169                                 rc = wdata->result;
3170                         else
3171                                 ctx->total_len += wdata->bytes;
3172
3173                         /* resend call if it's a retryable error */
3174                         if (rc == -EAGAIN) {
3175                                 struct list_head tmp_list;
3176                                 struct iov_iter tmp_from = ctx->iter;
3177
3178                                 INIT_LIST_HEAD(&tmp_list);
3179                                 list_del_init(&wdata->list);
3180
3181                                 if (ctx->direct_io)
3182                                         rc = cifs_resend_wdata(
3183                                                 wdata, &tmp_list, ctx);
3184                                 else {
3185                                         iov_iter_advance(&tmp_from,
3186                                                  wdata->offset - ctx->pos);
3187
3188                                         rc = cifs_write_from_iter(wdata->offset,
3189                                                 wdata->bytes, &tmp_from,
3190                                                 ctx->cfile, cifs_sb, &tmp_list,
3191                                                 ctx);
3192
3193                                         kref_put(&wdata->refcount,
3194                                                 cifs_uncached_writedata_release);
3195                                 }
3196
3197                                 list_splice(&tmp_list, &ctx->list);
3198                                 goto restart_loop;
3199                         }
3200                 }
3201                 list_del_init(&wdata->list);
3202                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3203         }
3204
3205         cifs_stats_bytes_written(tcon, ctx->total_len);
3206         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3207
3208         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3209
3210         mutex_unlock(&ctx->aio_mutex);
3211
3212         if (ctx->iocb && ctx->iocb->ki_complete)
3213                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3214         else
3215                 complete(&ctx->done);
3216 }
3217
3218 static ssize_t __cifs_writev(
3219         struct kiocb *iocb, struct iov_iter *from, bool direct)
3220 {
3221         struct file *file = iocb->ki_filp;
3222         ssize_t total_written = 0;
3223         struct cifsFileInfo *cfile;
3224         struct cifs_tcon *tcon;
3225         struct cifs_sb_info *cifs_sb;
3226         struct cifs_aio_ctx *ctx;
3227         struct iov_iter saved_from = *from;
3228         size_t len = iov_iter_count(from);
3229         int rc;
3230
3231         /*
3232          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3233          * In this case, fall back to non-direct write function.
3234          * this could be improved by getting pages directly in ITER_KVEC
3235          */
3236         if (direct && iov_iter_is_kvec(from)) {
3237                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3238                 direct = false;
3239         }
3240
3241         rc = generic_write_checks(iocb, from);
3242         if (rc <= 0)
3243                 return rc;
3244
3245         cifs_sb = CIFS_FILE_SB(file);
3246         cfile = file->private_data;
3247         tcon = tlink_tcon(cfile->tlink);
3248
3249         if (!tcon->ses->server->ops->async_writev)
3250                 return -ENOSYS;
3251
3252         ctx = cifs_aio_ctx_alloc();
3253         if (!ctx)
3254                 return -ENOMEM;
3255
3256         ctx->cfile = cifsFileInfo_get(cfile);
3257
3258         if (!is_sync_kiocb(iocb))
3259                 ctx->iocb = iocb;
3260
3261         ctx->pos = iocb->ki_pos;
3262
3263         if (direct) {
3264                 ctx->direct_io = true;
3265                 ctx->iter = *from;
3266                 ctx->len = len;
3267         } else {
3268                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3269                 if (rc) {
3270                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3271                         return rc;
3272                 }
3273         }
3274
3275         /* grab a lock here due to read response handlers can access ctx */
3276         mutex_lock(&ctx->aio_mutex);
3277
3278         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3279                                   cfile, cifs_sb, &ctx->list, ctx);
3280
3281         /*
3282          * If at least one write was successfully sent, then discard any rc
3283          * value from the later writes. If the other write succeeds, then
3284          * we'll end up returning whatever was written. If it fails, then
3285          * we'll get a new rc value from that.
3286          */
3287         if (!list_empty(&ctx->list))
3288                 rc = 0;
3289
3290         mutex_unlock(&ctx->aio_mutex);
3291
3292         if (rc) {
3293                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3294                 return rc;
3295         }
3296
3297         if (!is_sync_kiocb(iocb)) {
3298                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3299                 return -EIOCBQUEUED;
3300         }
3301
3302         rc = wait_for_completion_killable(&ctx->done);
3303         if (rc) {
3304                 mutex_lock(&ctx->aio_mutex);
3305                 ctx->rc = rc = -EINTR;
3306                 total_written = ctx->total_len;
3307                 mutex_unlock(&ctx->aio_mutex);
3308         } else {
3309                 rc = ctx->rc;
3310                 total_written = ctx->total_len;
3311         }
3312
3313         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3314
3315         if (unlikely(!total_written))
3316                 return rc;
3317
3318         iocb->ki_pos += total_written;
3319         return total_written;
3320 }
3321
3322 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3323 {
3324         struct file *file = iocb->ki_filp;
3325
3326         cifs_revalidate_mapping(file->f_inode);
3327         return __cifs_writev(iocb, from, true);
3328 }
3329
3330 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3331 {
3332         return __cifs_writev(iocb, from, false);
3333 }
3334
3335 static ssize_t
3336 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3337 {
3338         struct file *file = iocb->ki_filp;
3339         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3340         struct inode *inode = file->f_mapping->host;
3341         struct cifsInodeInfo *cinode = CIFS_I(inode);
3342         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3343         ssize_t rc;
3344
3345         inode_lock(inode);
3346         /*
3347          * We need to hold the sem to be sure nobody modifies lock list
3348          * with a brlock that prevents writing.
3349          */
3350         down_read(&cinode->lock_sem);
3351
3352         rc = generic_write_checks(iocb, from);
3353         if (rc <= 0)
3354                 goto out;
3355
3356         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3357                                      server->vals->exclusive_lock_type, 0,
3358                                      NULL, CIFS_WRITE_OP))
3359                 rc = __generic_file_write_iter(iocb, from);
3360         else
3361                 rc = -EACCES;
3362 out:
3363         up_read(&cinode->lock_sem);
3364         inode_unlock(inode);
3365
3366         if (rc > 0)
3367                 rc = generic_write_sync(iocb, rc);
3368         return rc;
3369 }
3370
3371 ssize_t
3372 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3373 {
3374         struct inode *inode = file_inode(iocb->ki_filp);
3375         struct cifsInodeInfo *cinode = CIFS_I(inode);
3376         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3377         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3378                                                 iocb->ki_filp->private_data;
3379         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3380         ssize_t written;
3381
3382         written = cifs_get_writer(cinode);
3383         if (written)
3384                 return written;
3385
3386         if (CIFS_CACHE_WRITE(cinode)) {
3387                 if (cap_unix(tcon->ses) &&
3388                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3389                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3390                         written = generic_file_write_iter(iocb, from);
3391                         goto out;
3392                 }
3393                 written = cifs_writev(iocb, from);
3394                 goto out;
3395         }
3396         /*
3397          * For non-oplocked files in strict cache mode we need to write the data
3398          * to the server exactly from the pos to pos+len-1 rather than flush all
3399          * affected pages because it may cause a error with mandatory locks on
3400          * these pages but not on the region from pos to ppos+len-1.
3401          */
3402         written = cifs_user_writev(iocb, from);
3403         if (CIFS_CACHE_READ(cinode)) {
3404                 /*
3405                  * We have read level caching and we have just sent a write
3406                  * request to the server thus making data in the cache stale.
3407                  * Zap the cache and set oplock/lease level to NONE to avoid
3408                  * reading stale data from the cache. All subsequent read
3409                  * operations will read new data from the server.
3410                  */
3411                 cifs_zap_mapping(inode);
3412                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3413                          inode);
3414                 cinode->oplock = 0;
3415         }
3416 out:
3417         cifs_put_writer(cinode);
3418         return written;
3419 }
3420
3421 static struct cifs_readdata *
3422 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3423 {
3424         struct cifs_readdata *rdata;
3425
3426         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3427         if (rdata != NULL) {
3428                 rdata->pages = pages;
3429                 kref_init(&rdata->refcount);
3430                 INIT_LIST_HEAD(&rdata->list);
3431                 init_completion(&rdata->done);
3432                 INIT_WORK(&rdata->work, complete);
3433         }
3434
3435         return rdata;
3436 }
3437
3438 static struct cifs_readdata *
3439 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3440 {
3441         struct page **pages =
3442                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3443         struct cifs_readdata *ret = NULL;
3444
3445         if (pages) {
3446                 ret = cifs_readdata_direct_alloc(pages, complete);
3447                 if (!ret)
3448                         kfree(pages);
3449         }
3450
3451         return ret;
3452 }
3453
3454 void
3455 cifs_readdata_release(struct kref *refcount)
3456 {
3457         struct cifs_readdata *rdata = container_of(refcount,
3458                                         struct cifs_readdata, refcount);
3459 #ifdef CONFIG_CIFS_SMB_DIRECT
3460         if (rdata->mr) {
3461                 smbd_deregister_mr(rdata->mr);
3462                 rdata->mr = NULL;
3463         }
3464 #endif
3465         if (rdata->cfile)
3466                 cifsFileInfo_put(rdata->cfile);
3467
3468         kvfree(rdata->pages);
3469         kfree(rdata);
3470 }
3471
3472 static int
3473 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3474 {
3475         int rc = 0;
3476         struct page *page;
3477         unsigned int i;
3478
3479         for (i = 0; i < nr_pages; i++) {
3480                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3481                 if (!page) {
3482                         rc = -ENOMEM;
3483                         break;
3484                 }
3485                 rdata->pages[i] = page;
3486         }
3487
3488         if (rc) {
3489                 unsigned int nr_page_failed = i;
3490
3491                 for (i = 0; i < nr_page_failed; i++) {
3492                         put_page(rdata->pages[i]);
3493                         rdata->pages[i] = NULL;
3494                 }
3495         }
3496         return rc;
3497 }
3498
3499 static void
3500 cifs_uncached_readdata_release(struct kref *refcount)
3501 {
3502         struct cifs_readdata *rdata = container_of(refcount,
3503                                         struct cifs_readdata, refcount);
3504         unsigned int i;
3505
3506         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3507         for (i = 0; i < rdata->nr_pages; i++) {
3508                 put_page(rdata->pages[i]);
3509         }
3510         cifs_readdata_release(refcount);
3511 }
3512
3513 /**
3514  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3515  * @rdata:      the readdata response with list of pages holding data
3516  * @iter:       destination for our data
3517  *
3518  * This function copies data from a list of pages in a readdata response into
3519  * an array of iovecs. It will first calculate where the data should go
3520  * based on the info in the readdata and then copy the data into that spot.
3521  */
3522 static int
3523 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3524 {
3525         size_t remaining = rdata->got_bytes;
3526         unsigned int i;
3527
3528         for (i = 0; i < rdata->nr_pages; i++) {
3529                 struct page *page = rdata->pages[i];
3530                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3531                 size_t written;
3532
3533                 if (unlikely(iov_iter_is_pipe(iter))) {
3534                         void *addr = kmap_atomic(page);
3535
3536                         written = copy_to_iter(addr, copy, iter);
3537                         kunmap_atomic(addr);
3538                 } else
3539                         written = copy_page_to_iter(page, 0, copy, iter);
3540                 remaining -= written;
3541                 if (written < copy && iov_iter_count(iter) > 0)
3542                         break;
3543         }
3544         return remaining ? -EFAULT : 0;
3545 }
3546
3547 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3548
3549 static void
3550 cifs_uncached_readv_complete(struct work_struct *work)
3551 {
3552         struct cifs_readdata *rdata = container_of(work,
3553                                                 struct cifs_readdata, work);
3554
3555         complete(&rdata->done);
3556         collect_uncached_read_data(rdata->ctx);
3557         /* the below call can possibly free the last ref to aio ctx */
3558         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3559 }
3560
3561 static int
3562 uncached_fill_pages(struct TCP_Server_Info *server,
3563                     struct cifs_readdata *rdata, struct iov_iter *iter,
3564                     unsigned int len)
3565 {
3566         int result = 0;
3567         unsigned int i;
3568         unsigned int nr_pages = rdata->nr_pages;
3569         unsigned int page_offset = rdata->page_offset;
3570
3571         rdata->got_bytes = 0;
3572         rdata->tailsz = PAGE_SIZE;
3573         for (i = 0; i < nr_pages; i++) {
3574                 struct page *page = rdata->pages[i];
3575                 size_t n;
3576                 unsigned int segment_size = rdata->pagesz;
3577
3578                 if (i == 0)
3579                         segment_size -= page_offset;
3580                 else
3581                         page_offset = 0;
3582
3583
3584                 if (len <= 0) {
3585                         /* no need to hold page hostage */
3586                         rdata->pages[i] = NULL;
3587                         rdata->nr_pages--;
3588                         put_page(page);
3589                         continue;
3590                 }
3591
3592                 n = len;
3593                 if (len >= segment_size)
3594                         /* enough data to fill the page */
3595                         n = segment_size;
3596                 else
3597                         rdata->tailsz = len;
3598                 len -= n;
3599
3600                 if (iter)
3601                         result = copy_page_from_iter(
3602                                         page, page_offset, n, iter);
3603 #ifdef CONFIG_CIFS_SMB_DIRECT
3604                 else if (rdata->mr)
3605                         result = n;
3606 #endif
3607                 else
3608                         result = cifs_read_page_from_socket(
3609                                         server, page, page_offset, n);
3610                 if (result < 0)
3611                         break;
3612
3613                 rdata->got_bytes += result;
3614         }
3615
3616         return result != -ECONNABORTED && rdata->got_bytes > 0 ?
3617                                                 rdata->got_bytes : result;
3618 }
3619
3620 static int
3621 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3622                               struct cifs_readdata *rdata, unsigned int len)
3623 {
3624         return uncached_fill_pages(server, rdata, NULL, len);
3625 }
3626
3627 static int
3628 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3629                               struct cifs_readdata *rdata,
3630                               struct iov_iter *iter)
3631 {
3632         return uncached_fill_pages(server, rdata, iter, iter->count);
3633 }
3634
3635 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3636                         struct list_head *rdata_list,
3637                         struct cifs_aio_ctx *ctx)
3638 {
3639         unsigned int rsize;
3640         struct cifs_credits credits;
3641         int rc;
3642         struct TCP_Server_Info *server;
3643
3644         /* XXX: should we pick a new channel here? */
3645         server = rdata->server;
3646
3647         do {
3648                 if (rdata->cfile->invalidHandle) {
3649                         rc = cifs_reopen_file(rdata->cfile, true);
3650                         if (rc == -EAGAIN)
3651                                 continue;
3652                         else if (rc)
3653                                 break;
3654                 }
3655
3656                 /*
3657                  * Wait for credits to resend this rdata.
3658                  * Note: we are attempting to resend the whole rdata not in
3659                  * segments
3660                  */
3661                 do {
3662                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3663                                                 &rsize, &credits);
3664
3665                         if (rc)
3666                                 goto fail;
3667
3668                         if (rsize < rdata->bytes) {
3669                                 add_credits_and_wake_if(server, &credits, 0);
3670                                 msleep(1000);
3671                         }
3672                 } while (rsize < rdata->bytes);
3673                 rdata->credits = credits;
3674
3675                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3676                 if (!rc) {
3677                         if (rdata->cfile->invalidHandle)
3678                                 rc = -EAGAIN;
3679                         else {
3680 #ifdef CONFIG_CIFS_SMB_DIRECT
3681                                 if (rdata->mr) {
3682                                         rdata->mr->need_invalidate = true;
3683                                         smbd_deregister_mr(rdata->mr);
3684                                         rdata->mr = NULL;
3685                                 }
3686 #endif
3687                                 rc = server->ops->async_readv(rdata);
3688                         }
3689                 }
3690
3691                 /* If the read was successfully sent, we are done */
3692                 if (!rc) {
3693                         /* Add to aio pending list */
3694                         list_add_tail(&rdata->list, rdata_list);
3695                         return 0;
3696                 }
3697
3698                 /* Roll back credits and retry if needed */
3699                 add_credits_and_wake_if(server, &rdata->credits, 0);
3700         } while (rc == -EAGAIN);
3701
3702 fail:
3703         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3704         return rc;
3705 }
3706
3707 static int
3708 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3709                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3710                      struct cifs_aio_ctx *ctx)
3711 {
3712         struct cifs_readdata *rdata;
3713         unsigned int npages, rsize;
3714         struct cifs_credits credits_on_stack;
3715         struct cifs_credits *credits = &credits_on_stack;
3716         size_t cur_len;
3717         int rc;
3718         pid_t pid;
3719         struct TCP_Server_Info *server;
3720         struct page **pagevec;
3721         size_t start;
3722         struct iov_iter direct_iov = ctx->iter;
3723
3724         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3725
3726         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3727                 pid = open_file->pid;
3728         else
3729                 pid = current->tgid;
3730
3731         if (ctx->direct_io)
3732                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3733
3734         do {
3735                 if (open_file->invalidHandle) {
3736                         rc = cifs_reopen_file(open_file, true);
3737                         if (rc == -EAGAIN)
3738                                 continue;
3739                         else if (rc)
3740                                 break;
3741                 }
3742
3743                 if (cifs_sb->ctx->rsize == 0)
3744                         cifs_sb->ctx->rsize =
3745                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3746                                                              cifs_sb->ctx);
3747
3748                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3749                                                    &rsize, credits);
3750                 if (rc)
3751                         break;
3752
3753                 cur_len = min_t(const size_t, len, rsize);
3754
3755                 if (ctx->direct_io) {
3756                         ssize_t result;
3757
3758                         result = iov_iter_get_pages_alloc(
3759                                         &direct_iov, &pagevec,
3760                                         cur_len, &start);
3761                         if (result < 0) {
3762                                 cifs_dbg(VFS,
3763                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3764                                          result, iov_iter_type(&direct_iov),
3765                                          direct_iov.iov_offset,
3766                                          direct_iov.count);
3767                                 dump_stack();
3768
3769                                 rc = result;
3770                                 add_credits_and_wake_if(server, credits, 0);
3771                                 break;
3772                         }
3773                         cur_len = (size_t)result;
3774                         iov_iter_advance(&direct_iov, cur_len);
3775
3776                         rdata = cifs_readdata_direct_alloc(
3777                                         pagevec, cifs_uncached_readv_complete);
3778                         if (!rdata) {
3779                                 add_credits_and_wake_if(server, credits, 0);
3780                                 rc = -ENOMEM;
3781                                 break;
3782                         }
3783
3784                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3785                         rdata->page_offset = start;
3786                         rdata->tailsz = npages > 1 ?
3787                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3788                                 cur_len;
3789
3790                 } else {
3791
3792                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3793                         /* allocate a readdata struct */
3794                         rdata = cifs_readdata_alloc(npages,
3795                                             cifs_uncached_readv_complete);
3796                         if (!rdata) {
3797                                 add_credits_and_wake_if(server, credits, 0);
3798                                 rc = -ENOMEM;
3799                                 break;
3800                         }
3801
3802                         rc = cifs_read_allocate_pages(rdata, npages);
3803                         if (rc) {
3804                                 kvfree(rdata->pages);
3805                                 kfree(rdata);
3806                                 add_credits_and_wake_if(server, credits, 0);
3807                                 break;
3808                         }
3809
3810                         rdata->tailsz = PAGE_SIZE;
3811                 }
3812
3813                 rdata->server = server;
3814                 rdata->cfile = cifsFileInfo_get(open_file);
3815                 rdata->nr_pages = npages;
3816                 rdata->offset = offset;
3817                 rdata->bytes = cur_len;
3818                 rdata->pid = pid;
3819                 rdata->pagesz = PAGE_SIZE;
3820                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3821                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3822                 rdata->credits = credits_on_stack;
3823                 rdata->ctx = ctx;
3824                 kref_get(&ctx->refcount);
3825
3826                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3827
3828                 if (!rc) {
3829                         if (rdata->cfile->invalidHandle)
3830                                 rc = -EAGAIN;
3831                         else
3832                                 rc = server->ops->async_readv(rdata);
3833                 }
3834
3835                 if (rc) {
3836                         add_credits_and_wake_if(server, &rdata->credits, 0);
3837                         kref_put(&rdata->refcount,
3838                                 cifs_uncached_readdata_release);
3839                         if (rc == -EAGAIN) {
3840                                 iov_iter_revert(&direct_iov, cur_len);
3841                                 continue;
3842                         }
3843                         break;
3844                 }
3845
3846                 list_add_tail(&rdata->list, rdata_list);
3847                 offset += cur_len;
3848                 len -= cur_len;
3849         } while (len > 0);
3850
3851         return rc;
3852 }
3853
3854 static void
3855 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3856 {
3857         struct cifs_readdata *rdata, *tmp;
3858         struct iov_iter *to = &ctx->iter;
3859         struct cifs_sb_info *cifs_sb;
3860         int rc;
3861
3862         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3863
3864         mutex_lock(&ctx->aio_mutex);
3865
3866         if (list_empty(&ctx->list)) {
3867                 mutex_unlock(&ctx->aio_mutex);
3868                 return;
3869         }
3870
3871         rc = ctx->rc;
3872         /* the loop below should proceed in the order of increasing offsets */
3873 again:
3874         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3875                 if (!rc) {
3876                         if (!try_wait_for_completion(&rdata->done)) {
3877                                 mutex_unlock(&ctx->aio_mutex);
3878                                 return;
3879                         }
3880
3881                         if (rdata->result == -EAGAIN) {
3882                                 /* resend call if it's a retryable error */
3883                                 struct list_head tmp_list;
3884                                 unsigned int got_bytes = rdata->got_bytes;
3885
3886                                 list_del_init(&rdata->list);
3887                                 INIT_LIST_HEAD(&tmp_list);
3888
3889                                 /*
3890                                  * Got a part of data and then reconnect has
3891                                  * happened -- fill the buffer and continue
3892                                  * reading.
3893                                  */
3894                                 if (got_bytes && got_bytes < rdata->bytes) {
3895                                         rc = 0;
3896                                         if (!ctx->direct_io)
3897                                                 rc = cifs_readdata_to_iov(rdata, to);
3898                                         if (rc) {
3899                                                 kref_put(&rdata->refcount,
3900                                                         cifs_uncached_readdata_release);
3901                                                 continue;
3902                                         }
3903                                 }
3904
3905                                 if (ctx->direct_io) {
3906                                         /*
3907                                          * Re-use rdata as this is a
3908                                          * direct I/O
3909                                          */
3910                                         rc = cifs_resend_rdata(
3911                                                 rdata,
3912                                                 &tmp_list, ctx);
3913                                 } else {
3914                                         rc = cifs_send_async_read(
3915                                                 rdata->offset + got_bytes,
3916                                                 rdata->bytes - got_bytes,
3917                                                 rdata->cfile, cifs_sb,
3918                                                 &tmp_list, ctx);
3919
3920                                         kref_put(&rdata->refcount,
3921                                                 cifs_uncached_readdata_release);
3922                                 }
3923
3924                                 list_splice(&tmp_list, &ctx->list);
3925
3926                                 goto again;
3927                         } else if (rdata->result)
3928                                 rc = rdata->result;
3929                         else if (!ctx->direct_io)
3930                                 rc = cifs_readdata_to_iov(rdata, to);
3931
3932                         /* if there was a short read -- discard anything left */
3933                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3934                                 rc = -ENODATA;
3935
3936                         ctx->total_len += rdata->got_bytes;
3937                 }
3938                 list_del_init(&rdata->list);
3939                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3940         }
3941
3942         if (!ctx->direct_io)
3943                 ctx->total_len = ctx->len - iov_iter_count(to);
3944
3945         /* mask nodata case */
3946         if (rc == -ENODATA)
3947                 rc = 0;
3948
3949         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3950
3951         mutex_unlock(&ctx->aio_mutex);
3952
3953         if (ctx->iocb && ctx->iocb->ki_complete)
3954                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3955         else
3956                 complete(&ctx->done);
3957 }
3958
3959 static ssize_t __cifs_readv(
3960         struct kiocb *iocb, struct iov_iter *to, bool direct)
3961 {
3962         size_t len;
3963         struct file *file = iocb->ki_filp;
3964         struct cifs_sb_info *cifs_sb;
3965         struct cifsFileInfo *cfile;
3966         struct cifs_tcon *tcon;
3967         ssize_t rc, total_read = 0;
3968         loff_t offset = iocb->ki_pos;
3969         struct cifs_aio_ctx *ctx;
3970
3971         /*
3972          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3973          * fall back to data copy read path
3974          * this could be improved by getting pages directly in ITER_KVEC
3975          */
3976         if (direct && iov_iter_is_kvec(to)) {
3977                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3978                 direct = false;
3979         }
3980
3981         len = iov_iter_count(to);
3982         if (!len)
3983                 return 0;
3984
3985         cifs_sb = CIFS_FILE_SB(file);
3986         cfile = file->private_data;
3987         tcon = tlink_tcon(cfile->tlink);
3988
3989         if (!tcon->ses->server->ops->async_readv)
3990                 return -ENOSYS;
3991
3992         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3993                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3994
3995         ctx = cifs_aio_ctx_alloc();
3996         if (!ctx)
3997                 return -ENOMEM;
3998
3999         ctx->cfile = cifsFileInfo_get(cfile);
4000
4001         if (!is_sync_kiocb(iocb))
4002                 ctx->iocb = iocb;
4003
4004         if (iter_is_iovec(to))
4005                 ctx->should_dirty = true;
4006
4007         if (direct) {
4008                 ctx->pos = offset;
4009                 ctx->direct_io = true;
4010                 ctx->iter = *to;
4011                 ctx->len = len;
4012         } else {
4013                 rc = setup_aio_ctx_iter(ctx, to, READ);
4014                 if (rc) {
4015                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4016                         return rc;
4017                 }
4018                 len = ctx->len;
4019         }
4020
4021         if (direct) {
4022                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4023                                                   offset, offset + len - 1);
4024                 if (rc) {
4025                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4026                         return -EAGAIN;
4027                 }
4028         }
4029
4030         /* grab a lock here due to read response handlers can access ctx */
4031         mutex_lock(&ctx->aio_mutex);
4032
4033         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4034
4035         /* if at least one read request send succeeded, then reset rc */
4036         if (!list_empty(&ctx->list))
4037                 rc = 0;
4038
4039         mutex_unlock(&ctx->aio_mutex);
4040
4041         if (rc) {
4042                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4043                 return rc;
4044         }
4045
4046         if (!is_sync_kiocb(iocb)) {
4047                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4048                 return -EIOCBQUEUED;
4049         }
4050
4051         rc = wait_for_completion_killable(&ctx->done);
4052         if (rc) {
4053                 mutex_lock(&ctx->aio_mutex);
4054                 ctx->rc = rc = -EINTR;
4055                 total_read = ctx->total_len;
4056                 mutex_unlock(&ctx->aio_mutex);
4057         } else {
4058                 rc = ctx->rc;
4059                 total_read = ctx->total_len;
4060         }
4061
4062         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4063
4064         if (total_read) {
4065                 iocb->ki_pos += total_read;
4066                 return total_read;
4067         }
4068         return rc;
4069 }
4070
4071 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4072 {
4073         return __cifs_readv(iocb, to, true);
4074 }
4075
4076 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4077 {
4078         return __cifs_readv(iocb, to, false);
4079 }
4080
4081 ssize_t
4082 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4083 {
4084         struct inode *inode = file_inode(iocb->ki_filp);
4085         struct cifsInodeInfo *cinode = CIFS_I(inode);
4086         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4087         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4088                                                 iocb->ki_filp->private_data;
4089         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4090         int rc = -EACCES;
4091
4092         /*
4093          * In strict cache mode we need to read from the server all the time
4094          * if we don't have level II oplock because the server can delay mtime
4095          * change - so we can't make a decision about inode invalidating.
4096          * And we can also fail with pagereading if there are mandatory locks
4097          * on pages affected by this read but not on the region from pos to
4098          * pos+len-1.
4099          */
4100         if (!CIFS_CACHE_READ(cinode))
4101                 return cifs_user_readv(iocb, to);
4102
4103         if (cap_unix(tcon->ses) &&
4104             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4105             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4106                 return generic_file_read_iter(iocb, to);
4107
4108         /*
4109          * We need to hold the sem to be sure nobody modifies lock list
4110          * with a brlock that prevents reading.
4111          */
4112         down_read(&cinode->lock_sem);
4113         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4114                                      tcon->ses->server->vals->shared_lock_type,
4115                                      0, NULL, CIFS_READ_OP))
4116                 rc = generic_file_read_iter(iocb, to);
4117         up_read(&cinode->lock_sem);
4118         return rc;
4119 }
4120
4121 static ssize_t
4122 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4123 {
4124         int rc = -EACCES;
4125         unsigned int bytes_read = 0;
4126         unsigned int total_read;
4127         unsigned int current_read_size;
4128         unsigned int rsize;
4129         struct cifs_sb_info *cifs_sb;
4130         struct cifs_tcon *tcon;
4131         struct TCP_Server_Info *server;
4132         unsigned int xid;
4133         char *cur_offset;
4134         struct cifsFileInfo *open_file;
4135         struct cifs_io_parms io_parms = {0};
4136         int buf_type = CIFS_NO_BUFFER;
4137         __u32 pid;
4138
4139         xid = get_xid();
4140         cifs_sb = CIFS_FILE_SB(file);
4141
4142         /* FIXME: set up handlers for larger reads and/or convert to async */
4143         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4144
4145         if (file->private_data == NULL) {
4146                 rc = -EBADF;
4147                 free_xid(xid);
4148                 return rc;
4149         }
4150         open_file = file->private_data;
4151         tcon = tlink_tcon(open_file->tlink);
4152         server = cifs_pick_channel(tcon->ses);
4153
4154         if (!server->ops->sync_read) {
4155                 free_xid(xid);
4156                 return -ENOSYS;
4157         }
4158
4159         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4160                 pid = open_file->pid;
4161         else
4162                 pid = current->tgid;
4163
4164         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4165                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4166
4167         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4168              total_read += bytes_read, cur_offset += bytes_read) {
4169                 do {
4170                         current_read_size = min_t(uint, read_size - total_read,
4171                                                   rsize);
4172                         /*
4173                          * For windows me and 9x we do not want to request more
4174                          * than it negotiated since it will refuse the read
4175                          * then.
4176                          */
4177                         if (!(tcon->ses->capabilities &
4178                                 tcon->ses->server->vals->cap_large_files)) {
4179                                 current_read_size = min_t(uint,
4180                                         current_read_size, CIFSMaxBufSize);
4181                         }
4182                         if (open_file->invalidHandle) {
4183                                 rc = cifs_reopen_file(open_file, true);
4184                                 if (rc != 0)
4185                                         break;
4186                         }
4187                         io_parms.pid = pid;
4188                         io_parms.tcon = tcon;
4189                         io_parms.offset = *offset;
4190                         io_parms.length = current_read_size;
4191                         io_parms.server = server;
4192                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4193                                                     &bytes_read, &cur_offset,
4194                                                     &buf_type);
4195                 } while (rc == -EAGAIN);
4196
4197                 if (rc || (bytes_read == 0)) {
4198                         if (total_read) {
4199                                 break;
4200                         } else {
4201                                 free_xid(xid);
4202                                 return rc;
4203                         }
4204                 } else {
4205                         cifs_stats_bytes_read(tcon, total_read);
4206                         *offset += bytes_read;
4207                 }
4208         }
4209         free_xid(xid);
4210         return total_read;
4211 }
4212
4213 /*
4214  * If the page is mmap'ed into a process' page tables, then we need to make
4215  * sure that it doesn't change while being written back.
4216  */
4217 static vm_fault_t
4218 cifs_page_mkwrite(struct vm_fault *vmf)
4219 {
4220         struct page *page = vmf->page;
4221         struct file *file = vmf->vma->vm_file;
4222         struct inode *inode = file_inode(file);
4223
4224         cifs_fscache_wait_on_page_write(inode, page);
4225
4226         lock_page(page);
4227         return VM_FAULT_LOCKED;
4228 }
4229
4230 static const struct vm_operations_struct cifs_file_vm_ops = {
4231         .fault = filemap_fault,
4232         .map_pages = filemap_map_pages,
4233         .page_mkwrite = cifs_page_mkwrite,
4234 };
4235
4236 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4237 {
4238         int xid, rc = 0;
4239         struct inode *inode = file_inode(file);
4240
4241         xid = get_xid();
4242
4243         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4244                 rc = cifs_zap_mapping(inode);
4245         if (!rc)
4246                 rc = generic_file_mmap(file, vma);
4247         if (!rc)
4248                 vma->vm_ops = &cifs_file_vm_ops;
4249
4250         free_xid(xid);
4251         return rc;
4252 }
4253
4254 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4255 {
4256         int rc, xid;
4257
4258         xid = get_xid();
4259
4260         rc = cifs_revalidate_file(file);
4261         if (rc)
4262                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4263                          rc);
4264         if (!rc)
4265                 rc = generic_file_mmap(file, vma);
4266         if (!rc)
4267                 vma->vm_ops = &cifs_file_vm_ops;
4268
4269         free_xid(xid);
4270         return rc;
4271 }
4272
4273 static void
4274 cifs_readv_complete(struct work_struct *work)
4275 {
4276         unsigned int i, got_bytes;
4277         struct cifs_readdata *rdata = container_of(work,
4278                                                 struct cifs_readdata, work);
4279
4280         got_bytes = rdata->got_bytes;
4281         for (i = 0; i < rdata->nr_pages; i++) {
4282                 struct page *page = rdata->pages[i];
4283
4284                 lru_cache_add(page);
4285
4286                 if (rdata->result == 0 ||
4287                     (rdata->result == -EAGAIN && got_bytes)) {
4288                         flush_dcache_page(page);
4289                         SetPageUptodate(page);
4290                 } else
4291                         SetPageError(page);
4292
4293                 unlock_page(page);
4294
4295                 if (rdata->result == 0 ||
4296                     (rdata->result == -EAGAIN && got_bytes))
4297                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4298                 else
4299                         cifs_fscache_uncache_page(rdata->mapping->host, page);
4300
4301                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4302
4303                 put_page(page);
4304                 rdata->pages[i] = NULL;
4305         }
4306         kref_put(&rdata->refcount, cifs_readdata_release);
4307 }
4308
4309 static int
4310 readpages_fill_pages(struct TCP_Server_Info *server,
4311                      struct cifs_readdata *rdata, struct iov_iter *iter,
4312                      unsigned int len)
4313 {
4314         int result = 0;
4315         unsigned int i;
4316         u64 eof;
4317         pgoff_t eof_index;
4318         unsigned int nr_pages = rdata->nr_pages;
4319         unsigned int page_offset = rdata->page_offset;
4320
4321         /* determine the eof that the server (probably) has */
4322         eof = CIFS_I(rdata->mapping->host)->server_eof;
4323         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4324         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4325
4326         rdata->got_bytes = 0;
4327         rdata->tailsz = PAGE_SIZE;
4328         for (i = 0; i < nr_pages; i++) {
4329                 struct page *page = rdata->pages[i];
4330                 unsigned int to_read = rdata->pagesz;
4331                 size_t n;
4332
4333                 if (i == 0)
4334                         to_read -= page_offset;
4335                 else
4336                         page_offset = 0;
4337
4338                 n = to_read;
4339
4340                 if (len >= to_read) {
4341                         len -= to_read;
4342                 } else if (len > 0) {
4343                         /* enough for partial page, fill and zero the rest */
4344                         zero_user(page, len + page_offset, to_read - len);
4345                         n = rdata->tailsz = len;
4346                         len = 0;
4347                 } else if (page->index > eof_index) {
4348                         /*
4349                          * The VFS will not try to do readahead past the
4350                          * i_size, but it's possible that we have outstanding
4351                          * writes with gaps in the middle and the i_size hasn't
4352                          * caught up yet. Populate those with zeroed out pages
4353                          * to prevent the VFS from repeatedly attempting to
4354                          * fill them until the writes are flushed.
4355                          */
4356                         zero_user(page, 0, PAGE_SIZE);
4357                         lru_cache_add(page);
4358                         flush_dcache_page(page);
4359                         SetPageUptodate(page);
4360                         unlock_page(page);
4361                         put_page(page);
4362                         rdata->pages[i] = NULL;
4363                         rdata->nr_pages--;
4364                         continue;
4365                 } else {
4366                         /* no need to hold page hostage */
4367                         lru_cache_add(page);
4368                         unlock_page(page);
4369                         put_page(page);
4370                         rdata->pages[i] = NULL;
4371                         rdata->nr_pages--;
4372                         continue;
4373                 }
4374
4375                 if (iter)
4376                         result = copy_page_from_iter(
4377                                         page, page_offset, n, iter);
4378 #ifdef CONFIG_CIFS_SMB_DIRECT
4379                 else if (rdata->mr)
4380                         result = n;
4381 #endif
4382                 else
4383                         result = cifs_read_page_from_socket(
4384                                         server, page, page_offset, n);
4385                 if (result < 0)
4386                         break;
4387
4388                 rdata->got_bytes += result;
4389         }
4390
4391         return result != -ECONNABORTED && rdata->got_bytes > 0 ?
4392                                                 rdata->got_bytes : result;
4393 }
4394
4395 static int
4396 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4397                                struct cifs_readdata *rdata, unsigned int len)
4398 {
4399         return readpages_fill_pages(server, rdata, NULL, len);
4400 }
4401
4402 static int
4403 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4404                                struct cifs_readdata *rdata,
4405                                struct iov_iter *iter)
4406 {
4407         return readpages_fill_pages(server, rdata, iter, iter->count);
4408 }
4409
4410 static int
4411 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4412                     unsigned int rsize, struct list_head *tmplist,
4413                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4414 {
4415         struct page *page, *tpage;
4416         unsigned int expected_index;
4417         int rc;
4418         gfp_t gfp = readahead_gfp_mask(mapping);
4419
4420         INIT_LIST_HEAD(tmplist);
4421
4422         page = lru_to_page(page_list);
4423
4424         /*
4425          * Lock the page and put it in the cache. Since no one else
4426          * should have access to this page, we're safe to simply set
4427          * PG_locked without checking it first.
4428          */
4429         __SetPageLocked(page);
4430         rc = add_to_page_cache_locked(page, mapping,
4431                                       page->index, gfp);
4432
4433         /* give up if we can't stick it in the cache */
4434         if (rc) {
4435                 __ClearPageLocked(page);
4436                 return rc;
4437         }
4438
4439         /* move first page to the tmplist */
4440         *offset = (loff_t)page->index << PAGE_SHIFT;
4441         *bytes = PAGE_SIZE;
4442         *nr_pages = 1;
4443         list_move_tail(&page->lru, tmplist);
4444
4445         /* now try and add more pages onto the request */
4446         expected_index = page->index + 1;
4447         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4448                 /* discontinuity ? */
4449                 if (page->index != expected_index)
4450                         break;
4451
4452                 /* would this page push the read over the rsize? */
4453                 if (*bytes + PAGE_SIZE > rsize)
4454                         break;
4455
4456                 __SetPageLocked(page);
4457                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4458                 if (rc) {
4459                         __ClearPageLocked(page);
4460                         break;
4461                 }
4462                 list_move_tail(&page->lru, tmplist);
4463                 (*bytes) += PAGE_SIZE;
4464                 expected_index++;
4465                 (*nr_pages)++;
4466         }
4467         return rc;
4468 }
4469
4470 static int cifs_readpages(struct file *file, struct address_space *mapping,
4471         struct list_head *page_list, unsigned num_pages)
4472 {
4473         int rc;
4474         int err = 0;
4475         struct list_head tmplist;
4476         struct cifsFileInfo *open_file = file->private_data;
4477         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4478         struct TCP_Server_Info *server;
4479         pid_t pid;
4480         unsigned int xid;
4481
4482         xid = get_xid();
4483         /*
4484          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4485          * immediately if the cookie is negative
4486          *
4487          * After this point, every page in the list might have PG_fscache set,
4488          * so we will need to clean that up off of every page we don't use.
4489          */
4490         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4491                                          &num_pages);
4492         if (rc == 0) {
4493                 free_xid(xid);
4494                 return rc;
4495         }
4496
4497         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4498                 pid = open_file->pid;
4499         else
4500                 pid = current->tgid;
4501
4502         rc = 0;
4503         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4504
4505         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4506                  __func__, file, mapping, num_pages);
4507
4508         /*
4509          * Start with the page at end of list and move it to private
4510          * list. Do the same with any following pages until we hit
4511          * the rsize limit, hit an index discontinuity, or run out of
4512          * pages. Issue the async read and then start the loop again
4513          * until the list is empty.
4514          *
4515          * Note that list order is important. The page_list is in
4516          * the order of declining indexes. When we put the pages in
4517          * the rdata->pages, then we want them in increasing order.
4518          */
4519         while (!list_empty(page_list) && !err) {
4520                 unsigned int i, nr_pages, bytes, rsize;
4521                 loff_t offset;
4522                 struct page *page, *tpage;
4523                 struct cifs_readdata *rdata;
4524                 struct cifs_credits credits_on_stack;
4525                 struct cifs_credits *credits = &credits_on_stack;
4526
4527                 if (open_file->invalidHandle) {
4528                         rc = cifs_reopen_file(open_file, true);
4529                         if (rc == -EAGAIN)
4530                                 continue;
4531                         else if (rc)
4532                                 break;
4533                 }
4534
4535                 if (cifs_sb->ctx->rsize == 0)
4536                         cifs_sb->ctx->rsize =
4537                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4538                                                              cifs_sb->ctx);
4539
4540                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4541                                                    &rsize, credits);
4542                 if (rc)
4543                         break;
4544
4545                 /*
4546                  * Give up immediately if rsize is too small to read an entire
4547                  * page. The VFS will fall back to readpage. We should never
4548                  * reach this point however since we set ra_pages to 0 when the
4549                  * rsize is smaller than a cache page.
4550                  */
4551                 if (unlikely(rsize < PAGE_SIZE)) {
4552                         add_credits_and_wake_if(server, credits, 0);
4553                         free_xid(xid);
4554                         return 0;
4555                 }
4556
4557                 nr_pages = 0;
4558                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4559                                          &nr_pages, &offset, &bytes);
4560                 if (!nr_pages) {
4561                         add_credits_and_wake_if(server, credits, 0);
4562                         break;
4563                 }
4564
4565                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4566                 if (!rdata) {
4567                         /* best to give up if we're out of mem */
4568                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4569                                 list_del(&page->lru);
4570                                 lru_cache_add(page);
4571                                 unlock_page(page);
4572                                 put_page(page);
4573                         }
4574                         rc = -ENOMEM;
4575                         add_credits_and_wake_if(server, credits, 0);
4576                         break;
4577                 }
4578
4579                 rdata->cfile = cifsFileInfo_get(open_file);
4580                 rdata->server = server;
4581                 rdata->mapping = mapping;
4582                 rdata->offset = offset;
4583                 rdata->bytes = bytes;
4584                 rdata->pid = pid;
4585                 rdata->pagesz = PAGE_SIZE;
4586                 rdata->tailsz = PAGE_SIZE;
4587                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4588                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4589                 rdata->credits = credits_on_stack;
4590
4591                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4592                         list_del(&page->lru);
4593                         rdata->pages[rdata->nr_pages++] = page;
4594                 }
4595
4596                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4597
4598                 if (!rc) {
4599                         if (rdata->cfile->invalidHandle)
4600                                 rc = -EAGAIN;
4601                         else
4602                                 rc = server->ops->async_readv(rdata);
4603                 }
4604
4605                 if (rc) {
4606                         add_credits_and_wake_if(server, &rdata->credits, 0);
4607                         for (i = 0; i < rdata->nr_pages; i++) {
4608                                 page = rdata->pages[i];
4609                                 lru_cache_add(page);
4610                                 unlock_page(page);
4611                                 put_page(page);
4612                         }
4613                         /* Fallback to the readpage in error/reconnect cases */
4614                         kref_put(&rdata->refcount, cifs_readdata_release);
4615                         break;
4616                 }
4617
4618                 kref_put(&rdata->refcount, cifs_readdata_release);
4619         }
4620
4621         /* Any pages that have been shown to fscache but didn't get added to
4622          * the pagecache must be uncached before they get returned to the
4623          * allocator.
4624          */
4625         cifs_fscache_readpages_cancel(mapping->host, page_list);
4626         free_xid(xid);
4627         return rc;
4628 }
4629
4630 /*
4631  * cifs_readpage_worker must be called with the page pinned
4632  */
4633 static int cifs_readpage_worker(struct file *file, struct page *page,
4634         loff_t *poffset)
4635 {
4636         char *read_data;
4637         int rc;
4638
4639         /* Is the page cached? */
4640         rc = cifs_readpage_from_fscache(file_inode(file), page);
4641         if (rc == 0)
4642                 goto read_complete;
4643
4644         read_data = kmap(page);
4645         /* for reads over a certain size could initiate async read ahead */
4646
4647         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4648
4649         if (rc < 0)
4650                 goto io_error;
4651         else
4652                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4653
4654         /* we do not want atime to be less than mtime, it broke some apps */
4655         file_inode(file)->i_atime = current_time(file_inode(file));
4656         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4657                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4658         else
4659                 file_inode(file)->i_atime = current_time(file_inode(file));
4660
4661         if (PAGE_SIZE > rc)
4662                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4663
4664         flush_dcache_page(page);
4665         SetPageUptodate(page);
4666
4667         /* send this page to the cache */
4668         cifs_readpage_to_fscache(file_inode(file), page);
4669
4670         rc = 0;
4671
4672 io_error:
4673         kunmap(page);
4674
4675 read_complete:
4676         unlock_page(page);
4677         return rc;
4678 }
4679
4680 static int cifs_readpage(struct file *file, struct page *page)
4681 {
4682         loff_t offset = page_file_offset(page);
4683         int rc = -EACCES;
4684         unsigned int xid;
4685
4686         xid = get_xid();
4687
4688         if (file->private_data == NULL) {
4689                 rc = -EBADF;
4690                 free_xid(xid);
4691                 return rc;
4692         }
4693
4694         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4695                  page, (int)offset, (int)offset);
4696
4697         rc = cifs_readpage_worker(file, page, &offset);
4698
4699         free_xid(xid);
4700         return rc;
4701 }
4702
4703 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4704 {
4705         struct cifsFileInfo *open_file;
4706
4707         spin_lock(&cifs_inode->open_file_lock);
4708         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4709                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4710                         spin_unlock(&cifs_inode->open_file_lock);
4711                         return 1;
4712                 }
4713         }
4714         spin_unlock(&cifs_inode->open_file_lock);
4715         return 0;
4716 }
4717
4718 /* We do not want to update the file size from server for inodes
4719    open for write - to avoid races with writepage extending
4720    the file - in the future we could consider allowing
4721    refreshing the inode only on increases in the file size
4722    but this is tricky to do without racing with writebehind
4723    page caching in the current Linux kernel design */
4724 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4725 {
4726         if (!cifsInode)
4727                 return true;
4728
4729         if (is_inode_writable(cifsInode)) {
4730                 /* This inode is open for write at least once */
4731                 struct cifs_sb_info *cifs_sb;
4732
4733                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4734                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4735                         /* since no page cache to corrupt on directio
4736                         we can change size safely */
4737                         return true;
4738                 }
4739
4740                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4741                         return true;
4742
4743                 return false;
4744         } else
4745                 return true;
4746 }
4747
4748 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4749                         loff_t pos, unsigned len, unsigned flags,
4750                         struct page **pagep, void **fsdata)
4751 {
4752         int oncethru = 0;
4753         pgoff_t index = pos >> PAGE_SHIFT;
4754         loff_t offset = pos & (PAGE_SIZE - 1);
4755         loff_t page_start = pos & PAGE_MASK;
4756         loff_t i_size;
4757         struct page *page;
4758         int rc = 0;
4759
4760         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4761
4762 start:
4763         page = grab_cache_page_write_begin(mapping, index, flags);
4764         if (!page) {
4765                 rc = -ENOMEM;
4766                 goto out;
4767         }
4768
4769         if (PageUptodate(page))
4770                 goto out;
4771
4772         /*
4773          * If we write a full page it will be up to date, no need to read from
4774          * the server. If the write is short, we'll end up doing a sync write
4775          * instead.
4776          */
4777         if (len == PAGE_SIZE)
4778                 goto out;
4779
4780         /*
4781          * optimize away the read when we have an oplock, and we're not
4782          * expecting to use any of the data we'd be reading in. That
4783          * is, when the page lies beyond the EOF, or straddles the EOF
4784          * and the write will cover all of the existing data.
4785          */
4786         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4787                 i_size = i_size_read(mapping->host);
4788                 if (page_start >= i_size ||
4789                     (offset == 0 && (pos + len) >= i_size)) {
4790                         zero_user_segments(page, 0, offset,
4791                                            offset + len,
4792                                            PAGE_SIZE);
4793                         /*
4794                          * PageChecked means that the parts of the page
4795                          * to which we're not writing are considered up
4796                          * to date. Once the data is copied to the
4797                          * page, it can be set uptodate.
4798                          */
4799                         SetPageChecked(page);
4800                         goto out;
4801                 }
4802         }
4803
4804         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4805                 /*
4806                  * might as well read a page, it is fast enough. If we get
4807                  * an error, we don't need to return it. cifs_write_end will
4808                  * do a sync write instead since PG_uptodate isn't set.
4809                  */
4810                 cifs_readpage_worker(file, page, &page_start);
4811                 put_page(page);
4812                 oncethru = 1;
4813                 goto start;
4814         } else {
4815                 /* we could try using another file handle if there is one -
4816                    but how would we lock it to prevent close of that handle
4817                    racing with this read? In any case
4818                    this will be written out by write_end so is fine */
4819         }
4820 out:
4821         *pagep = page;
4822         return rc;
4823 }
4824
4825 static int cifs_release_page(struct page *page, gfp_t gfp)
4826 {
4827         if (PagePrivate(page))
4828                 return 0;
4829
4830         return cifs_fscache_release_page(page, gfp);
4831 }
4832
4833 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4834                                  unsigned int length)
4835 {
4836         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4837
4838         if (offset == 0 && length == PAGE_SIZE)
4839                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4840 }
4841
4842 static int cifs_launder_page(struct page *page)
4843 {
4844         int rc = 0;
4845         loff_t range_start = page_offset(page);
4846         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4847         struct writeback_control wbc = {
4848                 .sync_mode = WB_SYNC_ALL,
4849                 .nr_to_write = 0,
4850                 .range_start = range_start,
4851                 .range_end = range_end,
4852         };
4853
4854         cifs_dbg(FYI, "Launder page: %p\n", page);
4855
4856         if (clear_page_dirty_for_io(page))
4857                 rc = cifs_writepage_locked(page, &wbc);
4858
4859         cifs_fscache_invalidate_page(page, page->mapping->host);
4860         return rc;
4861 }
4862
4863 void cifs_oplock_break(struct work_struct *work)
4864 {
4865         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4866                                                   oplock_break);
4867         struct inode *inode = d_inode(cfile->dentry);
4868         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4869         struct cifsInodeInfo *cinode = CIFS_I(inode);
4870         struct cifs_tcon *tcon;
4871         struct TCP_Server_Info *server;
4872         struct tcon_link *tlink;
4873         int rc = 0;
4874         bool purge_cache = false, oplock_break_cancelled;
4875         __u64 persistent_fid, volatile_fid;
4876         __u16 net_fid;
4877
4878         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4879                         TASK_UNINTERRUPTIBLE);
4880
4881         tlink = cifs_sb_tlink(cifs_sb);
4882         if (IS_ERR(tlink))
4883                 goto out;
4884         tcon = tlink_tcon(tlink);
4885         server = tcon->ses->server;
4886
4887         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4888                                       cfile->oplock_epoch, &purge_cache);
4889
4890         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4891                                                 cifs_has_mand_locks(cinode)) {
4892                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4893                          inode);
4894                 cinode->oplock = 0;
4895         }
4896
4897         if (inode && S_ISREG(inode->i_mode)) {
4898                 if (CIFS_CACHE_READ(cinode))
4899                         break_lease(inode, O_RDONLY);
4900                 else
4901                         break_lease(inode, O_WRONLY);
4902                 rc = filemap_fdatawrite(inode->i_mapping);
4903                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4904                         rc = filemap_fdatawait(inode->i_mapping);
4905                         mapping_set_error(inode->i_mapping, rc);
4906                         cifs_zap_mapping(inode);
4907                 }
4908                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4909                 if (CIFS_CACHE_WRITE(cinode))
4910                         goto oplock_break_ack;
4911         }
4912
4913         rc = cifs_push_locks(cfile);
4914         if (rc)
4915                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4916
4917 oplock_break_ack:
4918         /*
4919          * When oplock break is received and there are no active
4920          * file handles but cached, then schedule deferred close immediately.
4921          * So, new open will not use cached handle.
4922          */
4923
4924         if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
4925                 cifs_close_deferred_file(cinode);
4926
4927         persistent_fid = cfile->fid.persistent_fid;
4928         volatile_fid = cfile->fid.volatile_fid;
4929         net_fid = cfile->fid.netfid;
4930         oplock_break_cancelled = cfile->oplock_break_cancelled;
4931
4932         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4933         /*
4934          * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
4935          * an acknowledgment to be sent when the file has already been closed.
4936          */
4937         spin_lock(&cinode->open_file_lock);
4938         /* check list empty since can race with kill_sb calling tree disconnect */
4939         if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
4940                 spin_unlock(&cinode->open_file_lock);
4941                 rc = server->ops->oplock_response(tcon, persistent_fid,
4942                                                   volatile_fid, net_fid, cinode);
4943                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4944         } else
4945                 spin_unlock(&cinode->open_file_lock);
4946
4947         cifs_put_tlink(tlink);
4948 out:
4949         cifs_done_oplock_break(cinode);
4950 }
4951
4952 /*
4953  * The presence of cifs_direct_io() in the address space ops vector
4954  * allowes open() O_DIRECT flags which would have failed otherwise.
4955  *
4956  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4957  * so this method should never be called.
4958  *
4959  * Direct IO is not yet supported in the cached mode. 
4960  */
4961 static ssize_t
4962 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4963 {
4964         /*
4965          * FIXME
4966          * Eventually need to support direct IO for non forcedirectio mounts
4967          */
4968         return -EINVAL;
4969 }
4970
4971 static int cifs_swap_activate(struct swap_info_struct *sis,
4972                               struct file *swap_file, sector_t *span)
4973 {
4974         struct cifsFileInfo *cfile = swap_file->private_data;
4975         struct inode *inode = swap_file->f_mapping->host;
4976         unsigned long blocks;
4977         long long isize;
4978
4979         cifs_dbg(FYI, "swap activate\n");
4980
4981         spin_lock(&inode->i_lock);
4982         blocks = inode->i_blocks;
4983         isize = inode->i_size;
4984         spin_unlock(&inode->i_lock);
4985         if (blocks*512 < isize) {
4986                 pr_warn("swap activate: swapfile has holes\n");
4987                 return -EINVAL;
4988         }
4989         *span = sis->pages;
4990
4991         pr_warn_once("Swap support over SMB3 is experimental\n");
4992
4993         /*
4994          * TODO: consider adding ACL (or documenting how) to prevent other
4995          * users (on this or other systems) from reading it
4996          */
4997
4998
4999         /* TODO: add sk_set_memalloc(inet) or similar */
5000
5001         if (cfile)
5002                 cfile->swapfile = true;
5003         /*
5004          * TODO: Since file already open, we can't open with DENY_ALL here
5005          * but we could add call to grab a byte range lock to prevent others
5006          * from reading or writing the file
5007          */
5008
5009         return 0;
5010 }
5011
5012 static void cifs_swap_deactivate(struct file *file)
5013 {
5014         struct cifsFileInfo *cfile = file->private_data;
5015
5016         cifs_dbg(FYI, "swap deactivate\n");
5017
5018         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5019
5020         if (cfile)
5021                 cfile->swapfile = false;
5022
5023         /* do we need to unpin (or unlock) the file */
5024 }
5025
5026 const struct address_space_operations cifs_addr_ops = {
5027         .readpage = cifs_readpage,
5028         .readpages = cifs_readpages,
5029         .writepage = cifs_writepage,
5030         .writepages = cifs_writepages,
5031         .write_begin = cifs_write_begin,
5032         .write_end = cifs_write_end,
5033         .set_page_dirty = __set_page_dirty_nobuffers,
5034         .releasepage = cifs_release_page,
5035         .direct_IO = cifs_direct_io,
5036         .invalidatepage = cifs_invalidate_page,
5037         .launder_page = cifs_launder_page,
5038         /*
5039          * TODO: investigate and if useful we could add an cifs_migratePage
5040          * helper (under an CONFIG_MIGRATION) in the future, and also
5041          * investigate and add an is_dirty_writeback helper if needed
5042          */
5043         .swap_activate = cifs_swap_activate,
5044         .swap_deactivate = cifs_swap_deactivate,
5045 };
5046
5047 /*
5048  * cifs_readpages requires the server to support a buffer large enough to
5049  * contain the header plus one complete page of data.  Otherwise, we need
5050  * to leave cifs_readpages out of the address space operations.
5051  */
5052 const struct address_space_operations cifs_addr_ops_smallbuf = {
5053         .readpage = cifs_readpage,
5054         .writepage = cifs_writepage,
5055         .writepages = cifs_writepages,
5056         .write_begin = cifs_write_begin,
5057         .write_end = cifs_write_end,
5058         .set_page_dirty = __set_page_dirty_nobuffers,
5059         .releasepage = cifs_release_page,
5060         .invalidatepage = cifs_invalidate_page,
5061         .launder_page = cifs_launder_page,
5062 };