GNU Linux-libre 5.10.217-gnu1
[releases.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50         if ((flags & O_ACCMODE) == O_RDONLY)
51                 return GENERIC_READ;
52         else if ((flags & O_ACCMODE) == O_WRONLY)
53                 return GENERIC_WRITE;
54         else if ((flags & O_ACCMODE) == O_RDWR) {
55                 /* GENERIC_ALL is too much permission to request
56                    can cause unnecessary access denied on create */
57                 /* return GENERIC_ALL; */
58                 return (GENERIC_READ | GENERIC_WRITE);
59         }
60
61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63                 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68         u32 posix_flags = 0;
69
70         if ((flags & O_ACCMODE) == O_RDONLY)
71                 posix_flags = SMB_O_RDONLY;
72         else if ((flags & O_ACCMODE) == O_WRONLY)
73                 posix_flags = SMB_O_WRONLY;
74         else if ((flags & O_ACCMODE) == O_RDWR)
75                 posix_flags = SMB_O_RDWR;
76
77         if (flags & O_CREAT) {
78                 posix_flags |= SMB_O_CREAT;
79                 if (flags & O_EXCL)
80                         posix_flags |= SMB_O_EXCL;
81         } else if (flags & O_EXCL)
82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83                          current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_revalidate_mapping(*pinode);
168                 cifs_fattr_to_inode(*pinode, &fattr);
169         }
170
171 posix_open_ret:
172         kfree(presp_data);
173         return rc;
174 }
175
176 static int
177 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
178              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
179              struct cifs_fid *fid, unsigned int xid)
180 {
181         int rc;
182         int desired_access;
183         int disposition;
184         int create_options = CREATE_NOT_DIR;
185         FILE_ALL_INFO *buf;
186         struct TCP_Server_Info *server = tcon->ses->server;
187         struct cifs_open_parms oparms;
188
189         if (!server->ops->open)
190                 return -ENOSYS;
191
192         desired_access = cifs_convert_flags(f_flags);
193
194 /*********************************************************************
195  *  open flag mapping table:
196  *
197  *      POSIX Flag            CIFS Disposition
198  *      ----------            ----------------
199  *      O_CREAT               FILE_OPEN_IF
200  *      O_CREAT | O_EXCL      FILE_CREATE
201  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
202  *      O_TRUNC               FILE_OVERWRITE
203  *      none of the above     FILE_OPEN
204  *
205  *      Note that there is not a direct match between disposition
206  *      FILE_SUPERSEDE (ie create whether or not file exists although
207  *      O_CREAT | O_TRUNC is similar but truncates the existing
208  *      file rather than creating a new file as FILE_SUPERSEDE does
209  *      (which uses the attributes / metadata passed in on open call)
210  *?
211  *?  O_SYNC is a reasonable match to CIFS writethrough flag
212  *?  and the read write flags match reasonably.  O_LARGEFILE
213  *?  is irrelevant because largefile support is always used
214  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
215  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
216  *********************************************************************/
217
218         disposition = cifs_get_disposition(f_flags);
219
220         /* BB pass O_SYNC flag through on file attributes .. BB */
221
222         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
223         if (!buf)
224                 return -ENOMEM;
225
226         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
227         if (f_flags & O_SYNC)
228                 create_options |= CREATE_WRITE_THROUGH;
229
230         if (f_flags & O_DIRECT)
231                 create_options |= CREATE_NO_BUFFER;
232
233         oparms.tcon = tcon;
234         oparms.cifs_sb = cifs_sb;
235         oparms.desired_access = desired_access;
236         oparms.create_options = cifs_create_options(cifs_sb, create_options);
237         oparms.disposition = disposition;
238         oparms.path = full_path;
239         oparms.fid = fid;
240         oparms.reconnect = false;
241
242         rc = server->ops->open(xid, &oparms, oplock, buf);
243
244         if (rc)
245                 goto out;
246
247         /* TODO: Add support for calling posix query info but with passing in fid */
248         if (tcon->unix_ext)
249                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
250                                               xid);
251         else
252                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
253                                          xid, fid);
254
255         if (rc) {
256                 server->ops->close(xid, tcon, fid);
257                 if (rc == -ESTALE)
258                         rc = -EOPENSTALE;
259         }
260
261 out:
262         kfree(buf);
263         return rc;
264 }
265
266 static bool
267 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
268 {
269         struct cifs_fid_locks *cur;
270         bool has_locks = false;
271
272         down_read(&cinode->lock_sem);
273         list_for_each_entry(cur, &cinode->llist, llist) {
274                 if (!list_empty(&cur->locks)) {
275                         has_locks = true;
276                         break;
277                 }
278         }
279         up_read(&cinode->lock_sem);
280         return has_locks;
281 }
282
283 void
284 cifs_down_write(struct rw_semaphore *sem)
285 {
286         while (!down_write_trylock(sem))
287                 msleep(10);
288 }
289
290 static void cifsFileInfo_put_work(struct work_struct *work);
291
292 struct cifsFileInfo *
293 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
294                   struct tcon_link *tlink, __u32 oplock)
295 {
296         struct dentry *dentry = file_dentry(file);
297         struct inode *inode = d_inode(dentry);
298         struct cifsInodeInfo *cinode = CIFS_I(inode);
299         struct cifsFileInfo *cfile;
300         struct cifs_fid_locks *fdlocks;
301         struct cifs_tcon *tcon = tlink_tcon(tlink);
302         struct TCP_Server_Info *server = tcon->ses->server;
303
304         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
305         if (cfile == NULL)
306                 return cfile;
307
308         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
309         if (!fdlocks) {
310                 kfree(cfile);
311                 return NULL;
312         }
313
314         INIT_LIST_HEAD(&fdlocks->locks);
315         fdlocks->cfile = cfile;
316         cfile->llist = fdlocks;
317
318         cfile->count = 1;
319         cfile->pid = current->tgid;
320         cfile->uid = current_fsuid();
321         cfile->dentry = dget(dentry);
322         cfile->f_flags = file->f_flags;
323         cfile->invalidHandle = false;
324         cfile->tlink = cifs_get_tlink(tlink);
325         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
326         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
327         mutex_init(&cfile->fh_mutex);
328         spin_lock_init(&cfile->file_info_lock);
329
330         cifs_sb_active(inode->i_sb);
331
332         /*
333          * If the server returned a read oplock and we have mandatory brlocks,
334          * set oplock level to None.
335          */
336         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
337                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
338                 oplock = 0;
339         }
340
341         cifs_down_write(&cinode->lock_sem);
342         list_add(&fdlocks->llist, &cinode->llist);
343         up_write(&cinode->lock_sem);
344
345         spin_lock(&tcon->open_file_lock);
346         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
347                 oplock = fid->pending_open->oplock;
348         list_del(&fid->pending_open->olist);
349
350         fid->purge_cache = false;
351         server->ops->set_fid(cfile, fid, oplock);
352
353         list_add(&cfile->tlist, &tcon->openFileList);
354         atomic_inc(&tcon->num_local_opens);
355
356         /* if readable file instance put first in list*/
357         spin_lock(&cinode->open_file_lock);
358         if (file->f_mode & FMODE_READ)
359                 list_add(&cfile->flist, &cinode->openFileList);
360         else
361                 list_add_tail(&cfile->flist, &cinode->openFileList);
362         spin_unlock(&cinode->open_file_lock);
363         spin_unlock(&tcon->open_file_lock);
364
365         if (fid->purge_cache)
366                 cifs_zap_mapping(inode);
367
368         file->private_data = cfile;
369         return cfile;
370 }
371
372 struct cifsFileInfo *
373 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
374 {
375         spin_lock(&cifs_file->file_info_lock);
376         cifsFileInfo_get_locked(cifs_file);
377         spin_unlock(&cifs_file->file_info_lock);
378         return cifs_file;
379 }
380
381 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
382 {
383         struct inode *inode = d_inode(cifs_file->dentry);
384         struct cifsInodeInfo *cifsi = CIFS_I(inode);
385         struct cifsLockInfo *li, *tmp;
386         struct super_block *sb = inode->i_sb;
387
388         /*
389          * Delete any outstanding lock records. We'll lose them when the file
390          * is closed anyway.
391          */
392         cifs_down_write(&cifsi->lock_sem);
393         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
394                 list_del(&li->llist);
395                 cifs_del_lock_waiters(li);
396                 kfree(li);
397         }
398         list_del(&cifs_file->llist->llist);
399         kfree(cifs_file->llist);
400         up_write(&cifsi->lock_sem);
401
402         cifs_put_tlink(cifs_file->tlink);
403         dput(cifs_file->dentry);
404         cifs_sb_deactive(sb);
405         kfree(cifs_file);
406 }
407
408 static void cifsFileInfo_put_work(struct work_struct *work)
409 {
410         struct cifsFileInfo *cifs_file = container_of(work,
411                         struct cifsFileInfo, put);
412
413         cifsFileInfo_put_final(cifs_file);
414 }
415
416 /**
417  * cifsFileInfo_put - release a reference of file priv data
418  *
419  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
420  */
421 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
422 {
423         _cifsFileInfo_put(cifs_file, true, true);
424 }
425
426 /**
427  * _cifsFileInfo_put - release a reference of file priv data
428  *
429  * This may involve closing the filehandle @cifs_file out on the
430  * server. Must be called without holding tcon->open_file_lock,
431  * cinode->open_file_lock and cifs_file->file_info_lock.
432  *
433  * If @wait_for_oplock_handler is true and we are releasing the last
434  * reference, wait for any running oplock break handler of the file
435  * and cancel any pending one. If calling this function from the
436  * oplock break handler, you need to pass false.
437  *
438  */
439 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
440                        bool wait_oplock_handler, bool offload)
441 {
442         struct inode *inode = d_inode(cifs_file->dentry);
443         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
444         struct TCP_Server_Info *server = tcon->ses->server;
445         struct cifsInodeInfo *cifsi = CIFS_I(inode);
446         struct super_block *sb = inode->i_sb;
447         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
448         struct cifs_fid fid;
449         struct cifs_pending_open open;
450         bool oplock_break_cancelled;
451
452         spin_lock(&tcon->open_file_lock);
453         spin_lock(&cifsi->open_file_lock);
454         spin_lock(&cifs_file->file_info_lock);
455         if (--cifs_file->count > 0) {
456                 spin_unlock(&cifs_file->file_info_lock);
457                 spin_unlock(&cifsi->open_file_lock);
458                 spin_unlock(&tcon->open_file_lock);
459                 return;
460         }
461         spin_unlock(&cifs_file->file_info_lock);
462
463         if (server->ops->get_lease_key)
464                 server->ops->get_lease_key(inode, &fid);
465
466         /* store open in pending opens to make sure we don't miss lease break */
467         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
468
469         /* remove it from the lists */
470         list_del(&cifs_file->flist);
471         list_del(&cifs_file->tlist);
472         atomic_dec(&tcon->num_local_opens);
473
474         if (list_empty(&cifsi->openFileList)) {
475                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
476                          d_inode(cifs_file->dentry));
477                 /*
478                  * In strict cache mode we need invalidate mapping on the last
479                  * close  because it may cause a error when we open this file
480                  * again and get at least level II oplock.
481                  */
482                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
483                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
484                 cifs_set_oplock_level(cifsi, 0);
485         }
486
487         spin_unlock(&cifsi->open_file_lock);
488         spin_unlock(&tcon->open_file_lock);
489
490         oplock_break_cancelled = wait_oplock_handler ?
491                 cancel_work_sync(&cifs_file->oplock_break) : false;
492
493         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
494                 struct TCP_Server_Info *server = tcon->ses->server;
495                 unsigned int xid;
496
497                 xid = get_xid();
498                 if (server->ops->close_getattr)
499                         server->ops->close_getattr(xid, tcon, cifs_file);
500                 else if (server->ops->close)
501                         server->ops->close(xid, tcon, &cifs_file->fid);
502                 _free_xid(xid);
503         }
504
505         if (oplock_break_cancelled)
506                 cifs_done_oplock_break(cifsi);
507
508         cifs_del_pending_open(&open);
509
510         if (offload)
511                 queue_work(fileinfo_put_wq, &cifs_file->put);
512         else
513                 cifsFileInfo_put_final(cifs_file);
514 }
515
516 int cifs_open(struct inode *inode, struct file *file)
517
518 {
519         int rc = -EACCES;
520         unsigned int xid;
521         __u32 oplock;
522         struct cifs_sb_info *cifs_sb;
523         struct TCP_Server_Info *server;
524         struct cifs_tcon *tcon;
525         struct tcon_link *tlink;
526         struct cifsFileInfo *cfile = NULL;
527         char *full_path = NULL;
528         bool posix_open_ok = false;
529         struct cifs_fid fid;
530         struct cifs_pending_open open;
531
532         xid = get_xid();
533
534         cifs_sb = CIFS_SB(inode->i_sb);
535         tlink = cifs_sb_tlink(cifs_sb);
536         if (IS_ERR(tlink)) {
537                 free_xid(xid);
538                 return PTR_ERR(tlink);
539         }
540         tcon = tlink_tcon(tlink);
541         server = tcon->ses->server;
542
543         full_path = build_path_from_dentry(file_dentry(file));
544         if (full_path == NULL) {
545                 rc = -ENOMEM;
546                 goto out;
547         }
548
549         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
550                  inode, file->f_flags, full_path);
551
552         if (file->f_flags & O_DIRECT &&
553             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
554                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
555                         file->f_op = &cifs_file_direct_nobrl_ops;
556                 else
557                         file->f_op = &cifs_file_direct_ops;
558         }
559
560         if (server->oplocks)
561                 oplock = REQ_OPLOCK;
562         else
563                 oplock = 0;
564
565         if (!tcon->broken_posix_open && tcon->unix_ext &&
566             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
567                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
568                 /* can not refresh inode info since size could be stale */
569                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
570                                 cifs_sb->mnt_file_mode /* ignored */,
571                                 file->f_flags, &oplock, &fid.netfid, xid);
572                 if (rc == 0) {
573                         cifs_dbg(FYI, "posix open succeeded\n");
574                         posix_open_ok = true;
575                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
576                         if (tcon->ses->serverNOS)
577                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
578                                          tcon->ses->serverName,
579                                          tcon->ses->serverNOS);
580                         tcon->broken_posix_open = true;
581                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
582                          (rc != -EOPNOTSUPP)) /* path not found or net err */
583                         goto out;
584                 /*
585                  * Else fallthrough to retry open the old way on network i/o
586                  * or DFS errors.
587                  */
588         }
589
590         if (server->ops->get_lease_key)
591                 server->ops->get_lease_key(inode, &fid);
592
593         cifs_add_pending_open(&fid, tlink, &open);
594
595         if (!posix_open_ok) {
596                 if (server->ops->get_lease_key)
597                         server->ops->get_lease_key(inode, &fid);
598
599                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
600                                   file->f_flags, &oplock, &fid, xid);
601                 if (rc) {
602                         cifs_del_pending_open(&open);
603                         goto out;
604                 }
605         }
606
607         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
608         if (cfile == NULL) {
609                 if (server->ops->close)
610                         server->ops->close(xid, tcon, &fid);
611                 cifs_del_pending_open(&open);
612                 rc = -ENOMEM;
613                 goto out;
614         }
615
616         cifs_fscache_set_inode_cookie(inode, file);
617
618         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
619                 /*
620                  * Time to set mode which we can not set earlier due to
621                  * problems creating new read-only files.
622                  */
623                 struct cifs_unix_set_info_args args = {
624                         .mode   = inode->i_mode,
625                         .uid    = INVALID_UID, /* no change */
626                         .gid    = INVALID_GID, /* no change */
627                         .ctime  = NO_CHANGE_64,
628                         .atime  = NO_CHANGE_64,
629                         .mtime  = NO_CHANGE_64,
630                         .device = 0,
631                 };
632                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
633                                        cfile->pid);
634         }
635
636 out:
637         kfree(full_path);
638         free_xid(xid);
639         cifs_put_tlink(tlink);
640         return rc;
641 }
642
643 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
644
645 /*
646  * Try to reacquire byte range locks that were released when session
647  * to server was lost.
648  */
649 static int
650 cifs_relock_file(struct cifsFileInfo *cfile)
651 {
652         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
653         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
654         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
655         int rc = 0;
656
657         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
658         if (cinode->can_cache_brlcks) {
659                 /* can cache locks - no need to relock */
660                 up_read(&cinode->lock_sem);
661                 return rc;
662         }
663
664         if (cap_unix(tcon->ses) &&
665             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
666             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
667                 rc = cifs_push_posix_locks(cfile);
668         else
669                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
670
671         up_read(&cinode->lock_sem);
672         return rc;
673 }
674
675 static int
676 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
677 {
678         int rc = -EACCES;
679         unsigned int xid;
680         __u32 oplock;
681         struct cifs_sb_info *cifs_sb;
682         struct cifs_tcon *tcon;
683         struct TCP_Server_Info *server;
684         struct cifsInodeInfo *cinode;
685         struct inode *inode;
686         char *full_path = NULL;
687         int desired_access;
688         int disposition = FILE_OPEN;
689         int create_options = CREATE_NOT_DIR;
690         struct cifs_open_parms oparms;
691
692         xid = get_xid();
693         mutex_lock(&cfile->fh_mutex);
694         if (!cfile->invalidHandle) {
695                 mutex_unlock(&cfile->fh_mutex);
696                 rc = 0;
697                 free_xid(xid);
698                 return rc;
699         }
700
701         inode = d_inode(cfile->dentry);
702         cifs_sb = CIFS_SB(inode->i_sb);
703         tcon = tlink_tcon(cfile->tlink);
704         server = tcon->ses->server;
705
706         /*
707          * Can not grab rename sem here because various ops, including those
708          * that already have the rename sem can end up causing writepage to get
709          * called and if the server was down that means we end up here, and we
710          * can never tell if the caller already has the rename_sem.
711          */
712         full_path = build_path_from_dentry(cfile->dentry);
713         if (full_path == NULL) {
714                 rc = -ENOMEM;
715                 mutex_unlock(&cfile->fh_mutex);
716                 free_xid(xid);
717                 return rc;
718         }
719
720         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
721                  inode, cfile->f_flags, full_path);
722
723         if (tcon->ses->server->oplocks)
724                 oplock = REQ_OPLOCK;
725         else
726                 oplock = 0;
727
728         if (tcon->unix_ext && cap_unix(tcon->ses) &&
729             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
730                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
731                 /*
732                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
733                  * original open. Must mask them off for a reopen.
734                  */
735                 unsigned int oflags = cfile->f_flags &
736                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
737
738                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
739                                      cifs_sb->mnt_file_mode /* ignored */,
740                                      oflags, &oplock, &cfile->fid.netfid, xid);
741                 if (rc == 0) {
742                         cifs_dbg(FYI, "posix reopen succeeded\n");
743                         oparms.reconnect = true;
744                         goto reopen_success;
745                 }
746                 /*
747                  * fallthrough to retry open the old way on errors, especially
748                  * in the reconnect path it is important to retry hard
749                  */
750         }
751
752         desired_access = cifs_convert_flags(cfile->f_flags);
753
754         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
755         if (cfile->f_flags & O_SYNC)
756                 create_options |= CREATE_WRITE_THROUGH;
757
758         if (cfile->f_flags & O_DIRECT)
759                 create_options |= CREATE_NO_BUFFER;
760
761         if (server->ops->get_lease_key)
762                 server->ops->get_lease_key(inode, &cfile->fid);
763
764         oparms.tcon = tcon;
765         oparms.cifs_sb = cifs_sb;
766         oparms.desired_access = desired_access;
767         oparms.create_options = cifs_create_options(cifs_sb, create_options);
768         oparms.disposition = disposition;
769         oparms.path = full_path;
770         oparms.fid = &cfile->fid;
771         oparms.reconnect = true;
772
773         /*
774          * Can not refresh inode by passing in file_info buf to be returned by
775          * ops->open and then calling get_inode_info with returned buf since
776          * file might have write behind data that needs to be flushed and server
777          * version of file size can be stale. If we knew for sure that inode was
778          * not dirty locally we could do this.
779          */
780         rc = server->ops->open(xid, &oparms, &oplock, NULL);
781         if (rc == -ENOENT && oparms.reconnect == false) {
782                 /* durable handle timeout is expired - open the file again */
783                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
784                 /* indicate that we need to relock the file */
785                 oparms.reconnect = true;
786         }
787
788         if (rc) {
789                 mutex_unlock(&cfile->fh_mutex);
790                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
791                 cifs_dbg(FYI, "oplock: %d\n", oplock);
792                 goto reopen_error_exit;
793         }
794
795 reopen_success:
796         cfile->invalidHandle = false;
797         mutex_unlock(&cfile->fh_mutex);
798         cinode = CIFS_I(inode);
799
800         if (can_flush) {
801                 rc = filemap_write_and_wait(inode->i_mapping);
802                 if (!is_interrupt_error(rc))
803                         mapping_set_error(inode->i_mapping, rc);
804
805                 if (tcon->posix_extensions)
806                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
807                 else if (tcon->unix_ext)
808                         rc = cifs_get_inode_info_unix(&inode, full_path,
809                                                       inode->i_sb, xid);
810                 else
811                         rc = cifs_get_inode_info(&inode, full_path, NULL,
812                                                  inode->i_sb, xid, NULL);
813         }
814         /*
815          * Else we are writing out data to server already and could deadlock if
816          * we tried to flush data, and since we do not know if we have data that
817          * would invalidate the current end of file on the server we can not go
818          * to the server to get the new inode info.
819          */
820
821         /*
822          * If the server returned a read oplock and we have mandatory brlocks,
823          * set oplock level to None.
824          */
825         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
826                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
827                 oplock = 0;
828         }
829
830         server->ops->set_fid(cfile, &cfile->fid, oplock);
831         if (oparms.reconnect)
832                 cifs_relock_file(cfile);
833
834 reopen_error_exit:
835         kfree(full_path);
836         free_xid(xid);
837         return rc;
838 }
839
840 int cifs_close(struct inode *inode, struct file *file)
841 {
842         if (file->private_data != NULL) {
843                 _cifsFileInfo_put(file->private_data, true, false);
844                 file->private_data = NULL;
845         }
846
847         /* return code from the ->release op is always ignored */
848         return 0;
849 }
850
851 void
852 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
853 {
854         struct cifsFileInfo *open_file;
855         struct list_head *tmp;
856         struct list_head *tmp1;
857         struct list_head tmp_list;
858
859         if (!tcon->use_persistent || !tcon->need_reopen_files)
860                 return;
861
862         tcon->need_reopen_files = false;
863
864         cifs_dbg(FYI, "Reopen persistent handles\n");
865         INIT_LIST_HEAD(&tmp_list);
866
867         /* list all files open on tree connection, reopen resilient handles  */
868         spin_lock(&tcon->open_file_lock);
869         list_for_each(tmp, &tcon->openFileList) {
870                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
871                 if (!open_file->invalidHandle)
872                         continue;
873                 cifsFileInfo_get(open_file);
874                 list_add_tail(&open_file->rlist, &tmp_list);
875         }
876         spin_unlock(&tcon->open_file_lock);
877
878         list_for_each_safe(tmp, tmp1, &tmp_list) {
879                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
880                 if (cifs_reopen_file(open_file, false /* do not flush */))
881                         tcon->need_reopen_files = true;
882                 list_del_init(&open_file->rlist);
883                 cifsFileInfo_put(open_file);
884         }
885 }
886
887 int cifs_closedir(struct inode *inode, struct file *file)
888 {
889         int rc = 0;
890         unsigned int xid;
891         struct cifsFileInfo *cfile = file->private_data;
892         struct cifs_tcon *tcon;
893         struct TCP_Server_Info *server;
894         char *buf;
895
896         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
897
898         if (cfile == NULL)
899                 return rc;
900
901         xid = get_xid();
902         tcon = tlink_tcon(cfile->tlink);
903         server = tcon->ses->server;
904
905         cifs_dbg(FYI, "Freeing private data in close dir\n");
906         spin_lock(&cfile->file_info_lock);
907         if (server->ops->dir_needs_close(cfile)) {
908                 cfile->invalidHandle = true;
909                 spin_unlock(&cfile->file_info_lock);
910                 if (server->ops->close_dir)
911                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
912                 else
913                         rc = -ENOSYS;
914                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
915                 /* not much we can do if it fails anyway, ignore rc */
916                 rc = 0;
917         } else
918                 spin_unlock(&cfile->file_info_lock);
919
920         buf = cfile->srch_inf.ntwrk_buf_start;
921         if (buf) {
922                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
923                 cfile->srch_inf.ntwrk_buf_start = NULL;
924                 if (cfile->srch_inf.smallBuf)
925                         cifs_small_buf_release(buf);
926                 else
927                         cifs_buf_release(buf);
928         }
929
930         cifs_put_tlink(cfile->tlink);
931         kfree(file->private_data);
932         file->private_data = NULL;
933         /* BB can we lock the filestruct while this is going on? */
934         free_xid(xid);
935         return rc;
936 }
937
938 static struct cifsLockInfo *
939 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
940 {
941         struct cifsLockInfo *lock =
942                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
943         if (!lock)
944                 return lock;
945         lock->offset = offset;
946         lock->length = length;
947         lock->type = type;
948         lock->pid = current->tgid;
949         lock->flags = flags;
950         INIT_LIST_HEAD(&lock->blist);
951         init_waitqueue_head(&lock->block_q);
952         return lock;
953 }
954
955 void
956 cifs_del_lock_waiters(struct cifsLockInfo *lock)
957 {
958         struct cifsLockInfo *li, *tmp;
959         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
960                 list_del_init(&li->blist);
961                 wake_up(&li->block_q);
962         }
963 }
964
965 #define CIFS_LOCK_OP    0
966 #define CIFS_READ_OP    1
967 #define CIFS_WRITE_OP   2
968
969 /* @rw_check : 0 - no op, 1 - read, 2 - write */
970 static bool
971 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
972                             __u64 length, __u8 type, __u16 flags,
973                             struct cifsFileInfo *cfile,
974                             struct cifsLockInfo **conf_lock, int rw_check)
975 {
976         struct cifsLockInfo *li;
977         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
978         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
979
980         list_for_each_entry(li, &fdlocks->locks, llist) {
981                 if (offset + length <= li->offset ||
982                     offset >= li->offset + li->length)
983                         continue;
984                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
985                     server->ops->compare_fids(cfile, cur_cfile)) {
986                         /* shared lock prevents write op through the same fid */
987                         if (!(li->type & server->vals->shared_lock_type) ||
988                             rw_check != CIFS_WRITE_OP)
989                                 continue;
990                 }
991                 if ((type & server->vals->shared_lock_type) &&
992                     ((server->ops->compare_fids(cfile, cur_cfile) &&
993                      current->tgid == li->pid) || type == li->type))
994                         continue;
995                 if (rw_check == CIFS_LOCK_OP &&
996                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
997                     server->ops->compare_fids(cfile, cur_cfile))
998                         continue;
999                 if (conf_lock)
1000                         *conf_lock = li;
1001                 return true;
1002         }
1003         return false;
1004 }
1005
1006 bool
1007 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1008                         __u8 type, __u16 flags,
1009                         struct cifsLockInfo **conf_lock, int rw_check)
1010 {
1011         bool rc = false;
1012         struct cifs_fid_locks *cur;
1013         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1014
1015         list_for_each_entry(cur, &cinode->llist, llist) {
1016                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1017                                                  flags, cfile, conf_lock,
1018                                                  rw_check);
1019                 if (rc)
1020                         break;
1021         }
1022
1023         return rc;
1024 }
1025
1026 /*
1027  * Check if there is another lock that prevents us to set the lock (mandatory
1028  * style). If such a lock exists, update the flock structure with its
1029  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1030  * or leave it the same if we can't. Returns 0 if we don't need to request to
1031  * the server or 1 otherwise.
1032  */
1033 static int
1034 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1035                __u8 type, struct file_lock *flock)
1036 {
1037         int rc = 0;
1038         struct cifsLockInfo *conf_lock;
1039         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1040         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1041         bool exist;
1042
1043         down_read(&cinode->lock_sem);
1044
1045         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1046                                         flock->fl_flags, &conf_lock,
1047                                         CIFS_LOCK_OP);
1048         if (exist) {
1049                 flock->fl_start = conf_lock->offset;
1050                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1051                 flock->fl_pid = conf_lock->pid;
1052                 if (conf_lock->type & server->vals->shared_lock_type)
1053                         flock->fl_type = F_RDLCK;
1054                 else
1055                         flock->fl_type = F_WRLCK;
1056         } else if (!cinode->can_cache_brlcks)
1057                 rc = 1;
1058         else
1059                 flock->fl_type = F_UNLCK;
1060
1061         up_read(&cinode->lock_sem);
1062         return rc;
1063 }
1064
1065 static void
1066 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1067 {
1068         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1069         cifs_down_write(&cinode->lock_sem);
1070         list_add_tail(&lock->llist, &cfile->llist->locks);
1071         up_write(&cinode->lock_sem);
1072 }
1073
1074 /*
1075  * Set the byte-range lock (mandatory style). Returns:
1076  * 1) 0, if we set the lock and don't need to request to the server;
1077  * 2) 1, if no locks prevent us but we need to request to the server;
1078  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1079  */
1080 static int
1081 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1082                  bool wait)
1083 {
1084         struct cifsLockInfo *conf_lock;
1085         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1086         bool exist;
1087         int rc = 0;
1088
1089 try_again:
1090         exist = false;
1091         cifs_down_write(&cinode->lock_sem);
1092
1093         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1094                                         lock->type, lock->flags, &conf_lock,
1095                                         CIFS_LOCK_OP);
1096         if (!exist && cinode->can_cache_brlcks) {
1097                 list_add_tail(&lock->llist, &cfile->llist->locks);
1098                 up_write(&cinode->lock_sem);
1099                 return rc;
1100         }
1101
1102         if (!exist)
1103                 rc = 1;
1104         else if (!wait)
1105                 rc = -EACCES;
1106         else {
1107                 list_add_tail(&lock->blist, &conf_lock->blist);
1108                 up_write(&cinode->lock_sem);
1109                 rc = wait_event_interruptible(lock->block_q,
1110                                         (lock->blist.prev == &lock->blist) &&
1111                                         (lock->blist.next == &lock->blist));
1112                 if (!rc)
1113                         goto try_again;
1114                 cifs_down_write(&cinode->lock_sem);
1115                 list_del_init(&lock->blist);
1116         }
1117
1118         up_write(&cinode->lock_sem);
1119         return rc;
1120 }
1121
1122 /*
1123  * Check if there is another lock that prevents us to set the lock (posix
1124  * style). If such a lock exists, update the flock structure with its
1125  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1126  * or leave it the same if we can't. Returns 0 if we don't need to request to
1127  * the server or 1 otherwise.
1128  */
1129 static int
1130 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1131 {
1132         int rc = 0;
1133         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1134         unsigned char saved_type = flock->fl_type;
1135
1136         if ((flock->fl_flags & FL_POSIX) == 0)
1137                 return 1;
1138
1139         down_read(&cinode->lock_sem);
1140         posix_test_lock(file, flock);
1141
1142         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1143                 flock->fl_type = saved_type;
1144                 rc = 1;
1145         }
1146
1147         up_read(&cinode->lock_sem);
1148         return rc;
1149 }
1150
1151 /*
1152  * Set the byte-range lock (posix style). Returns:
1153  * 1) <0, if the error occurs while setting the lock;
1154  * 2) 0, if we set the lock and don't need to request to the server;
1155  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1156  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1157  */
1158 static int
1159 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1160 {
1161         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1162         int rc = FILE_LOCK_DEFERRED + 1;
1163
1164         if ((flock->fl_flags & FL_POSIX) == 0)
1165                 return rc;
1166
1167         cifs_down_write(&cinode->lock_sem);
1168         if (!cinode->can_cache_brlcks) {
1169                 up_write(&cinode->lock_sem);
1170                 return rc;
1171         }
1172
1173         rc = posix_lock_file(file, flock, NULL);
1174         up_write(&cinode->lock_sem);
1175         return rc;
1176 }
1177
1178 int
1179 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1180 {
1181         unsigned int xid;
1182         int rc = 0, stored_rc;
1183         struct cifsLockInfo *li, *tmp;
1184         struct cifs_tcon *tcon;
1185         unsigned int num, max_num, max_buf;
1186         LOCKING_ANDX_RANGE *buf, *cur;
1187         static const int types[] = {
1188                 LOCKING_ANDX_LARGE_FILES,
1189                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1190         };
1191         int i;
1192
1193         xid = get_xid();
1194         tcon = tlink_tcon(cfile->tlink);
1195
1196         /*
1197          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1198          * and check it before using.
1199          */
1200         max_buf = tcon->ses->server->maxBuf;
1201         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1202                 free_xid(xid);
1203                 return -EINVAL;
1204         }
1205
1206         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1207                      PAGE_SIZE);
1208         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1209                         PAGE_SIZE);
1210         max_num = (max_buf - sizeof(struct smb_hdr)) /
1211                                                 sizeof(LOCKING_ANDX_RANGE);
1212         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1213         if (!buf) {
1214                 free_xid(xid);
1215                 return -ENOMEM;
1216         }
1217
1218         for (i = 0; i < 2; i++) {
1219                 cur = buf;
1220                 num = 0;
1221                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1222                         if (li->type != types[i])
1223                                 continue;
1224                         cur->Pid = cpu_to_le16(li->pid);
1225                         cur->LengthLow = cpu_to_le32((u32)li->length);
1226                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1227                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1228                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1229                         if (++num == max_num) {
1230                                 stored_rc = cifs_lockv(xid, tcon,
1231                                                        cfile->fid.netfid,
1232                                                        (__u8)li->type, 0, num,
1233                                                        buf);
1234                                 if (stored_rc)
1235                                         rc = stored_rc;
1236                                 cur = buf;
1237                                 num = 0;
1238                         } else
1239                                 cur++;
1240                 }
1241
1242                 if (num) {
1243                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1244                                                (__u8)types[i], 0, num, buf);
1245                         if (stored_rc)
1246                                 rc = stored_rc;
1247                 }
1248         }
1249
1250         kfree(buf);
1251         free_xid(xid);
1252         return rc;
1253 }
1254
1255 static __u32
1256 hash_lockowner(fl_owner_t owner)
1257 {
1258         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1259 }
1260
1261 struct lock_to_push {
1262         struct list_head llist;
1263         __u64 offset;
1264         __u64 length;
1265         __u32 pid;
1266         __u16 netfid;
1267         __u8 type;
1268 };
1269
1270 static int
1271 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1272 {
1273         struct inode *inode = d_inode(cfile->dentry);
1274         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1275         struct file_lock *flock;
1276         struct file_lock_context *flctx = inode->i_flctx;
1277         unsigned int count = 0, i;
1278         int rc = 0, xid, type;
1279         struct list_head locks_to_send, *el;
1280         struct lock_to_push *lck, *tmp;
1281         __u64 length;
1282
1283         xid = get_xid();
1284
1285         if (!flctx)
1286                 goto out;
1287
1288         spin_lock(&flctx->flc_lock);
1289         list_for_each(el, &flctx->flc_posix) {
1290                 count++;
1291         }
1292         spin_unlock(&flctx->flc_lock);
1293
1294         INIT_LIST_HEAD(&locks_to_send);
1295
1296         /*
1297          * Allocating count locks is enough because no FL_POSIX locks can be
1298          * added to the list while we are holding cinode->lock_sem that
1299          * protects locking operations of this inode.
1300          */
1301         for (i = 0; i < count; i++) {
1302                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1303                 if (!lck) {
1304                         rc = -ENOMEM;
1305                         goto err_out;
1306                 }
1307                 list_add_tail(&lck->llist, &locks_to_send);
1308         }
1309
1310         el = locks_to_send.next;
1311         spin_lock(&flctx->flc_lock);
1312         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1313                 if (el == &locks_to_send) {
1314                         /*
1315                          * The list ended. We don't have enough allocated
1316                          * structures - something is really wrong.
1317                          */
1318                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1319                         break;
1320                 }
1321                 length = 1 + flock->fl_end - flock->fl_start;
1322                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1323                         type = CIFS_RDLCK;
1324                 else
1325                         type = CIFS_WRLCK;
1326                 lck = list_entry(el, struct lock_to_push, llist);
1327                 lck->pid = hash_lockowner(flock->fl_owner);
1328                 lck->netfid = cfile->fid.netfid;
1329                 lck->length = length;
1330                 lck->type = type;
1331                 lck->offset = flock->fl_start;
1332         }
1333         spin_unlock(&flctx->flc_lock);
1334
1335         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1336                 int stored_rc;
1337
1338                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1339                                              lck->offset, lck->length, NULL,
1340                                              lck->type, 0);
1341                 if (stored_rc)
1342                         rc = stored_rc;
1343                 list_del(&lck->llist);
1344                 kfree(lck);
1345         }
1346
1347 out:
1348         free_xid(xid);
1349         return rc;
1350 err_out:
1351         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1352                 list_del(&lck->llist);
1353                 kfree(lck);
1354         }
1355         goto out;
1356 }
1357
1358 static int
1359 cifs_push_locks(struct cifsFileInfo *cfile)
1360 {
1361         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1362         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1363         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1364         int rc = 0;
1365
1366         /* we are going to update can_cache_brlcks here - need a write access */
1367         cifs_down_write(&cinode->lock_sem);
1368         if (!cinode->can_cache_brlcks) {
1369                 up_write(&cinode->lock_sem);
1370                 return rc;
1371         }
1372
1373         if (cap_unix(tcon->ses) &&
1374             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1375             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1376                 rc = cifs_push_posix_locks(cfile);
1377         else
1378                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1379
1380         cinode->can_cache_brlcks = false;
1381         up_write(&cinode->lock_sem);
1382         return rc;
1383 }
1384
1385 static void
1386 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1387                 bool *wait_flag, struct TCP_Server_Info *server)
1388 {
1389         if (flock->fl_flags & FL_POSIX)
1390                 cifs_dbg(FYI, "Posix\n");
1391         if (flock->fl_flags & FL_FLOCK)
1392                 cifs_dbg(FYI, "Flock\n");
1393         if (flock->fl_flags & FL_SLEEP) {
1394                 cifs_dbg(FYI, "Blocking lock\n");
1395                 *wait_flag = true;
1396         }
1397         if (flock->fl_flags & FL_ACCESS)
1398                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1399         if (flock->fl_flags & FL_LEASE)
1400                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1401         if (flock->fl_flags &
1402             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1403                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1404                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1405
1406         *type = server->vals->large_lock_type;
1407         if (flock->fl_type == F_WRLCK) {
1408                 cifs_dbg(FYI, "F_WRLCK\n");
1409                 *type |= server->vals->exclusive_lock_type;
1410                 *lock = 1;
1411         } else if (flock->fl_type == F_UNLCK) {
1412                 cifs_dbg(FYI, "F_UNLCK\n");
1413                 *type |= server->vals->unlock_lock_type;
1414                 *unlock = 1;
1415                 /* Check if unlock includes more than one lock range */
1416         } else if (flock->fl_type == F_RDLCK) {
1417                 cifs_dbg(FYI, "F_RDLCK\n");
1418                 *type |= server->vals->shared_lock_type;
1419                 *lock = 1;
1420         } else if (flock->fl_type == F_EXLCK) {
1421                 cifs_dbg(FYI, "F_EXLCK\n");
1422                 *type |= server->vals->exclusive_lock_type;
1423                 *lock = 1;
1424         } else if (flock->fl_type == F_SHLCK) {
1425                 cifs_dbg(FYI, "F_SHLCK\n");
1426                 *type |= server->vals->shared_lock_type;
1427                 *lock = 1;
1428         } else
1429                 cifs_dbg(FYI, "Unknown type of lock\n");
1430 }
1431
1432 static int
1433 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1434            bool wait_flag, bool posix_lck, unsigned int xid)
1435 {
1436         int rc = 0;
1437         __u64 length = 1 + flock->fl_end - flock->fl_start;
1438         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1439         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1440         struct TCP_Server_Info *server = tcon->ses->server;
1441         __u16 netfid = cfile->fid.netfid;
1442
1443         if (posix_lck) {
1444                 int posix_lock_type;
1445
1446                 rc = cifs_posix_lock_test(file, flock);
1447                 if (!rc)
1448                         return rc;
1449
1450                 if (type & server->vals->shared_lock_type)
1451                         posix_lock_type = CIFS_RDLCK;
1452                 else
1453                         posix_lock_type = CIFS_WRLCK;
1454                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1455                                       hash_lockowner(flock->fl_owner),
1456                                       flock->fl_start, length, flock,
1457                                       posix_lock_type, wait_flag);
1458                 return rc;
1459         }
1460
1461         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1462         if (!rc)
1463                 return rc;
1464
1465         /* BB we could chain these into one lock request BB */
1466         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1467                                     1, 0, false);
1468         if (rc == 0) {
1469                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1470                                             type, 0, 1, false);
1471                 flock->fl_type = F_UNLCK;
1472                 if (rc != 0)
1473                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1474                                  rc);
1475                 return 0;
1476         }
1477
1478         if (type & server->vals->shared_lock_type) {
1479                 flock->fl_type = F_WRLCK;
1480                 return 0;
1481         }
1482
1483         type &= ~server->vals->exclusive_lock_type;
1484
1485         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1486                                     type | server->vals->shared_lock_type,
1487                                     1, 0, false);
1488         if (rc == 0) {
1489                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1490                         type | server->vals->shared_lock_type, 0, 1, false);
1491                 flock->fl_type = F_RDLCK;
1492                 if (rc != 0)
1493                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1494                                  rc);
1495         } else
1496                 flock->fl_type = F_WRLCK;
1497
1498         return 0;
1499 }
1500
1501 void
1502 cifs_move_llist(struct list_head *source, struct list_head *dest)
1503 {
1504         struct list_head *li, *tmp;
1505         list_for_each_safe(li, tmp, source)
1506                 list_move(li, dest);
1507 }
1508
1509 void
1510 cifs_free_llist(struct list_head *llist)
1511 {
1512         struct cifsLockInfo *li, *tmp;
1513         list_for_each_entry_safe(li, tmp, llist, llist) {
1514                 cifs_del_lock_waiters(li);
1515                 list_del(&li->llist);
1516                 kfree(li);
1517         }
1518 }
1519
1520 int
1521 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1522                   unsigned int xid)
1523 {
1524         int rc = 0, stored_rc;
1525         static const int types[] = {
1526                 LOCKING_ANDX_LARGE_FILES,
1527                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1528         };
1529         unsigned int i;
1530         unsigned int max_num, num, max_buf;
1531         LOCKING_ANDX_RANGE *buf, *cur;
1532         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1533         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1534         struct cifsLockInfo *li, *tmp;
1535         __u64 length = 1 + flock->fl_end - flock->fl_start;
1536         struct list_head tmp_llist;
1537
1538         INIT_LIST_HEAD(&tmp_llist);
1539
1540         /*
1541          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1542          * and check it before using.
1543          */
1544         max_buf = tcon->ses->server->maxBuf;
1545         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1546                 return -EINVAL;
1547
1548         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1549                      PAGE_SIZE);
1550         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1551                         PAGE_SIZE);
1552         max_num = (max_buf - sizeof(struct smb_hdr)) /
1553                                                 sizeof(LOCKING_ANDX_RANGE);
1554         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1555         if (!buf)
1556                 return -ENOMEM;
1557
1558         cifs_down_write(&cinode->lock_sem);
1559         for (i = 0; i < 2; i++) {
1560                 cur = buf;
1561                 num = 0;
1562                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1563                         if (flock->fl_start > li->offset ||
1564                             (flock->fl_start + length) <
1565                             (li->offset + li->length))
1566                                 continue;
1567                         if (current->tgid != li->pid)
1568                                 continue;
1569                         if (types[i] != li->type)
1570                                 continue;
1571                         if (cinode->can_cache_brlcks) {
1572                                 /*
1573                                  * We can cache brlock requests - simply remove
1574                                  * a lock from the file's list.
1575                                  */
1576                                 list_del(&li->llist);
1577                                 cifs_del_lock_waiters(li);
1578                                 kfree(li);
1579                                 continue;
1580                         }
1581                         cur->Pid = cpu_to_le16(li->pid);
1582                         cur->LengthLow = cpu_to_le32((u32)li->length);
1583                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1584                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1585                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1586                         /*
1587                          * We need to save a lock here to let us add it again to
1588                          * the file's list if the unlock range request fails on
1589                          * the server.
1590                          */
1591                         list_move(&li->llist, &tmp_llist);
1592                         if (++num == max_num) {
1593                                 stored_rc = cifs_lockv(xid, tcon,
1594                                                        cfile->fid.netfid,
1595                                                        li->type, num, 0, buf);
1596                                 if (stored_rc) {
1597                                         /*
1598                                          * We failed on the unlock range
1599                                          * request - add all locks from the tmp
1600                                          * list to the head of the file's list.
1601                                          */
1602                                         cifs_move_llist(&tmp_llist,
1603                                                         &cfile->llist->locks);
1604                                         rc = stored_rc;
1605                                 } else
1606                                         /*
1607                                          * The unlock range request succeed -
1608                                          * free the tmp list.
1609                                          */
1610                                         cifs_free_llist(&tmp_llist);
1611                                 cur = buf;
1612                                 num = 0;
1613                         } else
1614                                 cur++;
1615                 }
1616                 if (num) {
1617                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1618                                                types[i], num, 0, buf);
1619                         if (stored_rc) {
1620                                 cifs_move_llist(&tmp_llist,
1621                                                 &cfile->llist->locks);
1622                                 rc = stored_rc;
1623                         } else
1624                                 cifs_free_llist(&tmp_llist);
1625                 }
1626         }
1627
1628         up_write(&cinode->lock_sem);
1629         kfree(buf);
1630         return rc;
1631 }
1632
1633 static int
1634 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1635            bool wait_flag, bool posix_lck, int lock, int unlock,
1636            unsigned int xid)
1637 {
1638         int rc = 0;
1639         __u64 length = 1 + flock->fl_end - flock->fl_start;
1640         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1641         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1642         struct TCP_Server_Info *server = tcon->ses->server;
1643         struct inode *inode = d_inode(cfile->dentry);
1644
1645         if (posix_lck) {
1646                 int posix_lock_type;
1647
1648                 rc = cifs_posix_lock_set(file, flock);
1649                 if (rc <= FILE_LOCK_DEFERRED)
1650                         return rc;
1651
1652                 if (type & server->vals->shared_lock_type)
1653                         posix_lock_type = CIFS_RDLCK;
1654                 else
1655                         posix_lock_type = CIFS_WRLCK;
1656
1657                 if (unlock == 1)
1658                         posix_lock_type = CIFS_UNLCK;
1659
1660                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1661                                       hash_lockowner(flock->fl_owner),
1662                                       flock->fl_start, length,
1663                                       NULL, posix_lock_type, wait_flag);
1664                 goto out;
1665         }
1666
1667         if (lock) {
1668                 struct cifsLockInfo *lock;
1669
1670                 lock = cifs_lock_init(flock->fl_start, length, type,
1671                                       flock->fl_flags);
1672                 if (!lock)
1673                         return -ENOMEM;
1674
1675                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1676                 if (rc < 0) {
1677                         kfree(lock);
1678                         return rc;
1679                 }
1680                 if (!rc)
1681                         goto out;
1682
1683                 /*
1684                  * Windows 7 server can delay breaking lease from read to None
1685                  * if we set a byte-range lock on a file - break it explicitly
1686                  * before sending the lock to the server to be sure the next
1687                  * read won't conflict with non-overlapted locks due to
1688                  * pagereading.
1689                  */
1690                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1691                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1692                         cifs_zap_mapping(inode);
1693                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1694                                  inode);
1695                         CIFS_I(inode)->oplock = 0;
1696                 }
1697
1698                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1699                                             type, 1, 0, wait_flag);
1700                 if (rc) {
1701                         kfree(lock);
1702                         return rc;
1703                 }
1704
1705                 cifs_lock_add(cfile, lock);
1706         } else if (unlock)
1707                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1708
1709 out:
1710         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1711                 /*
1712                  * If this is a request to remove all locks because we
1713                  * are closing the file, it doesn't matter if the
1714                  * unlocking failed as both cifs.ko and the SMB server
1715                  * remove the lock on file close
1716                  */
1717                 if (rc) {
1718                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1719                         if (!(flock->fl_flags & FL_CLOSE))
1720                                 return rc;
1721                 }
1722                 rc = locks_lock_file_wait(file, flock);
1723         }
1724         return rc;
1725 }
1726
1727 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1728 {
1729         int rc, xid;
1730         int lock = 0, unlock = 0;
1731         bool wait_flag = false;
1732         bool posix_lck = false;
1733         struct cifs_sb_info *cifs_sb;
1734         struct cifs_tcon *tcon;
1735         struct cifsFileInfo *cfile;
1736         __u32 type;
1737
1738         xid = get_xid();
1739
1740         if (!(fl->fl_flags & FL_FLOCK)) {
1741                 rc = -ENOLCK;
1742                 free_xid(xid);
1743                 return rc;
1744         }
1745
1746         cfile = (struct cifsFileInfo *)file->private_data;
1747         tcon = tlink_tcon(cfile->tlink);
1748
1749         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1750                         tcon->ses->server);
1751         cifs_sb = CIFS_FILE_SB(file);
1752
1753         if (cap_unix(tcon->ses) &&
1754             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1755             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1756                 posix_lck = true;
1757
1758         if (!lock && !unlock) {
1759                 /*
1760                  * if no lock or unlock then nothing to do since we do not
1761                  * know what it is
1762                  */
1763                 rc = -EOPNOTSUPP;
1764                 free_xid(xid);
1765                 return rc;
1766         }
1767
1768         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1769                         xid);
1770         free_xid(xid);
1771         return rc;
1772
1773
1774 }
1775
1776 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1777 {
1778         int rc, xid;
1779         int lock = 0, unlock = 0;
1780         bool wait_flag = false;
1781         bool posix_lck = false;
1782         struct cifs_sb_info *cifs_sb;
1783         struct cifs_tcon *tcon;
1784         struct cifsFileInfo *cfile;
1785         __u32 type;
1786
1787         rc = -EACCES;
1788         xid = get_xid();
1789
1790         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1791                  cmd, flock->fl_flags, flock->fl_type,
1792                  flock->fl_start, flock->fl_end);
1793
1794         cfile = (struct cifsFileInfo *)file->private_data;
1795         tcon = tlink_tcon(cfile->tlink);
1796
1797         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1798                         tcon->ses->server);
1799         cifs_sb = CIFS_FILE_SB(file);
1800
1801         if (cap_unix(tcon->ses) &&
1802             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1803             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1804                 posix_lck = true;
1805         /*
1806          * BB add code here to normalize offset and length to account for
1807          * negative length which we can not accept over the wire.
1808          */
1809         if (IS_GETLK(cmd)) {
1810                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1811                 free_xid(xid);
1812                 return rc;
1813         }
1814
1815         if (!lock && !unlock) {
1816                 /*
1817                  * if no lock or unlock then nothing to do since we do not
1818                  * know what it is
1819                  */
1820                 free_xid(xid);
1821                 return -EOPNOTSUPP;
1822         }
1823
1824         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1825                         xid);
1826         free_xid(xid);
1827         return rc;
1828 }
1829
1830 /*
1831  * update the file size (if needed) after a write. Should be called with
1832  * the inode->i_lock held
1833  */
1834 void
1835 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1836                       unsigned int bytes_written)
1837 {
1838         loff_t end_of_write = offset + bytes_written;
1839
1840         if (end_of_write > cifsi->server_eof)
1841                 cifsi->server_eof = end_of_write;
1842 }
1843
1844 static ssize_t
1845 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1846            size_t write_size, loff_t *offset)
1847 {
1848         int rc = 0;
1849         unsigned int bytes_written = 0;
1850         unsigned int total_written;
1851         struct cifs_tcon *tcon;
1852         struct TCP_Server_Info *server;
1853         unsigned int xid;
1854         struct dentry *dentry = open_file->dentry;
1855         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1856         struct cifs_io_parms io_parms = {0};
1857
1858         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1859                  write_size, *offset, dentry);
1860
1861         tcon = tlink_tcon(open_file->tlink);
1862         server = tcon->ses->server;
1863
1864         if (!server->ops->sync_write)
1865                 return -ENOSYS;
1866
1867         xid = get_xid();
1868
1869         for (total_written = 0; write_size > total_written;
1870              total_written += bytes_written) {
1871                 rc = -EAGAIN;
1872                 while (rc == -EAGAIN) {
1873                         struct kvec iov[2];
1874                         unsigned int len;
1875
1876                         if (open_file->invalidHandle) {
1877                                 /* we could deadlock if we called
1878                                    filemap_fdatawait from here so tell
1879                                    reopen_file not to flush data to
1880                                    server now */
1881                                 rc = cifs_reopen_file(open_file, false);
1882                                 if (rc != 0)
1883                                         break;
1884                         }
1885
1886                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1887                                   (unsigned int)write_size - total_written);
1888                         /* iov[0] is reserved for smb header */
1889                         iov[1].iov_base = (char *)write_data + total_written;
1890                         iov[1].iov_len = len;
1891                         io_parms.pid = pid;
1892                         io_parms.tcon = tcon;
1893                         io_parms.offset = *offset;
1894                         io_parms.length = len;
1895                         rc = server->ops->sync_write(xid, &open_file->fid,
1896                                         &io_parms, &bytes_written, iov, 1);
1897                 }
1898                 if (rc || (bytes_written == 0)) {
1899                         if (total_written)
1900                                 break;
1901                         else {
1902                                 free_xid(xid);
1903                                 return rc;
1904                         }
1905                 } else {
1906                         spin_lock(&d_inode(dentry)->i_lock);
1907                         cifs_update_eof(cifsi, *offset, bytes_written);
1908                         spin_unlock(&d_inode(dentry)->i_lock);
1909                         *offset += bytes_written;
1910                 }
1911         }
1912
1913         cifs_stats_bytes_written(tcon, total_written);
1914
1915         if (total_written > 0) {
1916                 spin_lock(&d_inode(dentry)->i_lock);
1917                 if (*offset > d_inode(dentry)->i_size)
1918                         i_size_write(d_inode(dentry), *offset);
1919                 spin_unlock(&d_inode(dentry)->i_lock);
1920         }
1921         mark_inode_dirty_sync(d_inode(dentry));
1922         free_xid(xid);
1923         return total_written;
1924 }
1925
1926 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1927                                         bool fsuid_only)
1928 {
1929         struct cifsFileInfo *open_file = NULL;
1930         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1931
1932         /* only filter by fsuid on multiuser mounts */
1933         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1934                 fsuid_only = false;
1935
1936         spin_lock(&cifs_inode->open_file_lock);
1937         /* we could simply get the first_list_entry since write-only entries
1938            are always at the end of the list but since the first entry might
1939            have a close pending, we go through the whole list */
1940         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1941                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1942                         continue;
1943                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1944                         if (!open_file->invalidHandle) {
1945                                 /* found a good file */
1946                                 /* lock it so it will not be closed on us */
1947                                 cifsFileInfo_get(open_file);
1948                                 spin_unlock(&cifs_inode->open_file_lock);
1949                                 return open_file;
1950                         } /* else might as well continue, and look for
1951                              another, or simply have the caller reopen it
1952                              again rather than trying to fix this handle */
1953                 } else /* write only file */
1954                         break; /* write only files are last so must be done */
1955         }
1956         spin_unlock(&cifs_inode->open_file_lock);
1957         return NULL;
1958 }
1959
1960 /* Return -EBADF if no handle is found and general rc otherwise */
1961 int
1962 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1963                        struct cifsFileInfo **ret_file)
1964 {
1965         struct cifsFileInfo *open_file, *inv_file = NULL;
1966         struct cifs_sb_info *cifs_sb;
1967         bool any_available = false;
1968         int rc = -EBADF;
1969         unsigned int refind = 0;
1970         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1971         bool with_delete = flags & FIND_WR_WITH_DELETE;
1972         *ret_file = NULL;
1973
1974         /*
1975          * Having a null inode here (because mapping->host was set to zero by
1976          * the VFS or MM) should not happen but we had reports of on oops (due
1977          * to it being zero) during stress testcases so we need to check for it
1978          */
1979
1980         if (cifs_inode == NULL) {
1981                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1982                 dump_stack();
1983                 return rc;
1984         }
1985
1986         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1987
1988         /* only filter by fsuid on multiuser mounts */
1989         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1990                 fsuid_only = false;
1991
1992         spin_lock(&cifs_inode->open_file_lock);
1993 refind_writable:
1994         if (refind > MAX_REOPEN_ATT) {
1995                 spin_unlock(&cifs_inode->open_file_lock);
1996                 return rc;
1997         }
1998         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1999                 if (!any_available && open_file->pid != current->tgid)
2000                         continue;
2001                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2002                         continue;
2003                 if (with_delete && !(open_file->fid.access & DELETE))
2004                         continue;
2005                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2006                         if (!open_file->invalidHandle) {
2007                                 /* found a good writable file */
2008                                 cifsFileInfo_get(open_file);
2009                                 spin_unlock(&cifs_inode->open_file_lock);
2010                                 *ret_file = open_file;
2011                                 return 0;
2012                         } else {
2013                                 if (!inv_file)
2014                                         inv_file = open_file;
2015                         }
2016                 }
2017         }
2018         /* couldn't find useable FH with same pid, try any available */
2019         if (!any_available) {
2020                 any_available = true;
2021                 goto refind_writable;
2022         }
2023
2024         if (inv_file) {
2025                 any_available = false;
2026                 cifsFileInfo_get(inv_file);
2027         }
2028
2029         spin_unlock(&cifs_inode->open_file_lock);
2030
2031         if (inv_file) {
2032                 rc = cifs_reopen_file(inv_file, false);
2033                 if (!rc) {
2034                         *ret_file = inv_file;
2035                         return 0;
2036                 }
2037
2038                 spin_lock(&cifs_inode->open_file_lock);
2039                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2040                 spin_unlock(&cifs_inode->open_file_lock);
2041                 cifsFileInfo_put(inv_file);
2042                 ++refind;
2043                 inv_file = NULL;
2044                 spin_lock(&cifs_inode->open_file_lock);
2045                 goto refind_writable;
2046         }
2047
2048         return rc;
2049 }
2050
2051 struct cifsFileInfo *
2052 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2053 {
2054         struct cifsFileInfo *cfile;
2055         int rc;
2056
2057         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2058         if (rc)
2059                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2060
2061         return cfile;
2062 }
2063
2064 int
2065 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2066                        int flags,
2067                        struct cifsFileInfo **ret_file)
2068 {
2069         struct list_head *tmp;
2070         struct cifsFileInfo *cfile;
2071         struct cifsInodeInfo *cinode;
2072         char *full_path;
2073
2074         *ret_file = NULL;
2075
2076         spin_lock(&tcon->open_file_lock);
2077         list_for_each(tmp, &tcon->openFileList) {
2078                 cfile = list_entry(tmp, struct cifsFileInfo,
2079                              tlist);
2080                 full_path = build_path_from_dentry(cfile->dentry);
2081                 if (full_path == NULL) {
2082                         spin_unlock(&tcon->open_file_lock);
2083                         return -ENOMEM;
2084                 }
2085                 if (strcmp(full_path, name)) {
2086                         kfree(full_path);
2087                         continue;
2088                 }
2089
2090                 kfree(full_path);
2091                 cinode = CIFS_I(d_inode(cfile->dentry));
2092                 spin_unlock(&tcon->open_file_lock);
2093                 return cifs_get_writable_file(cinode, flags, ret_file);
2094         }
2095
2096         spin_unlock(&tcon->open_file_lock);
2097         return -ENOENT;
2098 }
2099
2100 int
2101 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2102                        struct cifsFileInfo **ret_file)
2103 {
2104         struct list_head *tmp;
2105         struct cifsFileInfo *cfile;
2106         struct cifsInodeInfo *cinode;
2107         char *full_path;
2108
2109         *ret_file = NULL;
2110
2111         spin_lock(&tcon->open_file_lock);
2112         list_for_each(tmp, &tcon->openFileList) {
2113                 cfile = list_entry(tmp, struct cifsFileInfo,
2114                              tlist);
2115                 full_path = build_path_from_dentry(cfile->dentry);
2116                 if (full_path == NULL) {
2117                         spin_unlock(&tcon->open_file_lock);
2118                         return -ENOMEM;
2119                 }
2120                 if (strcmp(full_path, name)) {
2121                         kfree(full_path);
2122                         continue;
2123                 }
2124
2125                 kfree(full_path);
2126                 cinode = CIFS_I(d_inode(cfile->dentry));
2127                 spin_unlock(&tcon->open_file_lock);
2128                 *ret_file = find_readable_file(cinode, 0);
2129                 return *ret_file ? 0 : -ENOENT;
2130         }
2131
2132         spin_unlock(&tcon->open_file_lock);
2133         return -ENOENT;
2134 }
2135
2136 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2137 {
2138         struct address_space *mapping = page->mapping;
2139         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2140         char *write_data;
2141         int rc = -EFAULT;
2142         int bytes_written = 0;
2143         struct inode *inode;
2144         struct cifsFileInfo *open_file;
2145
2146         if (!mapping || !mapping->host)
2147                 return -EFAULT;
2148
2149         inode = page->mapping->host;
2150
2151         offset += (loff_t)from;
2152         write_data = kmap(page);
2153         write_data += from;
2154
2155         if ((to > PAGE_SIZE) || (from > to)) {
2156                 kunmap(page);
2157                 return -EIO;
2158         }
2159
2160         /* racing with truncate? */
2161         if (offset > mapping->host->i_size) {
2162                 kunmap(page);
2163                 return 0; /* don't care */
2164         }
2165
2166         /* check to make sure that we are not extending the file */
2167         if (mapping->host->i_size - offset < (loff_t)to)
2168                 to = (unsigned)(mapping->host->i_size - offset);
2169
2170         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2171                                     &open_file);
2172         if (!rc) {
2173                 bytes_written = cifs_write(open_file, open_file->pid,
2174                                            write_data, to - from, &offset);
2175                 cifsFileInfo_put(open_file);
2176                 /* Does mm or vfs already set times? */
2177                 inode->i_atime = inode->i_mtime = current_time(inode);
2178                 if ((bytes_written > 0) && (offset))
2179                         rc = 0;
2180                 else if (bytes_written < 0)
2181                         rc = bytes_written;
2182                 else
2183                         rc = -EFAULT;
2184         } else {
2185                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2186                 if (!is_retryable_error(rc))
2187                         rc = -EIO;
2188         }
2189
2190         kunmap(page);
2191         return rc;
2192 }
2193
2194 static struct cifs_writedata *
2195 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2196                           pgoff_t end, pgoff_t *index,
2197                           unsigned int *found_pages)
2198 {
2199         struct cifs_writedata *wdata;
2200
2201         wdata = cifs_writedata_alloc((unsigned int)tofind,
2202                                      cifs_writev_complete);
2203         if (!wdata)
2204                 return NULL;
2205
2206         *found_pages = find_get_pages_range_tag(mapping, index, end,
2207                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2208         return wdata;
2209 }
2210
2211 static unsigned int
2212 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2213                     struct address_space *mapping,
2214                     struct writeback_control *wbc,
2215                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2216 {
2217         unsigned int nr_pages = 0, i;
2218         struct page *page;
2219
2220         for (i = 0; i < found_pages; i++) {
2221                 page = wdata->pages[i];
2222                 /*
2223                  * At this point we hold neither the i_pages lock nor the
2224                  * page lock: the page may be truncated or invalidated
2225                  * (changing page->mapping to NULL), or even swizzled
2226                  * back from swapper_space to tmpfs file mapping
2227                  */
2228
2229                 if (nr_pages == 0)
2230                         lock_page(page);
2231                 else if (!trylock_page(page))
2232                         break;
2233
2234                 if (unlikely(page->mapping != mapping)) {
2235                         unlock_page(page);
2236                         break;
2237                 }
2238
2239                 if (!wbc->range_cyclic && page->index > end) {
2240                         *done = true;
2241                         unlock_page(page);
2242                         break;
2243                 }
2244
2245                 if (*next && (page->index != *next)) {
2246                         /* Not next consecutive page */
2247                         unlock_page(page);
2248                         break;
2249                 }
2250
2251                 if (wbc->sync_mode != WB_SYNC_NONE)
2252                         wait_on_page_writeback(page);
2253
2254                 if (PageWriteback(page) ||
2255                                 !clear_page_dirty_for_io(page)) {
2256                         unlock_page(page);
2257                         break;
2258                 }
2259
2260                 /*
2261                  * This actually clears the dirty bit in the radix tree.
2262                  * See cifs_writepage() for more commentary.
2263                  */
2264                 set_page_writeback(page);
2265                 if (page_offset(page) >= i_size_read(mapping->host)) {
2266                         *done = true;
2267                         unlock_page(page);
2268                         end_page_writeback(page);
2269                         break;
2270                 }
2271
2272                 wdata->pages[i] = page;
2273                 *next = page->index + 1;
2274                 ++nr_pages;
2275         }
2276
2277         /* reset index to refind any pages skipped */
2278         if (nr_pages == 0)
2279                 *index = wdata->pages[0]->index + 1;
2280
2281         /* put any pages we aren't going to use */
2282         for (i = nr_pages; i < found_pages; i++) {
2283                 put_page(wdata->pages[i]);
2284                 wdata->pages[i] = NULL;
2285         }
2286
2287         return nr_pages;
2288 }
2289
2290 static int
2291 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2292                  struct address_space *mapping, struct writeback_control *wbc)
2293 {
2294         int rc;
2295
2296         wdata->sync_mode = wbc->sync_mode;
2297         wdata->nr_pages = nr_pages;
2298         wdata->offset = page_offset(wdata->pages[0]);
2299         wdata->pagesz = PAGE_SIZE;
2300         wdata->tailsz = min(i_size_read(mapping->host) -
2301                         page_offset(wdata->pages[nr_pages - 1]),
2302                         (loff_t)PAGE_SIZE);
2303         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2304         wdata->pid = wdata->cfile->pid;
2305
2306         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2307         if (rc)
2308                 return rc;
2309
2310         if (wdata->cfile->invalidHandle)
2311                 rc = -EAGAIN;
2312         else
2313                 rc = wdata->server->ops->async_writev(wdata,
2314                                                       cifs_writedata_release);
2315
2316         return rc;
2317 }
2318
2319 static int cifs_writepages(struct address_space *mapping,
2320                            struct writeback_control *wbc)
2321 {
2322         struct inode *inode = mapping->host;
2323         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2324         struct TCP_Server_Info *server;
2325         bool done = false, scanned = false, range_whole = false;
2326         pgoff_t end, index;
2327         struct cifs_writedata *wdata;
2328         struct cifsFileInfo *cfile = NULL;
2329         int rc = 0;
2330         int saved_rc = 0;
2331         unsigned int xid;
2332
2333         /*
2334          * If wsize is smaller than the page cache size, default to writing
2335          * one page at a time via cifs_writepage
2336          */
2337         if (cifs_sb->wsize < PAGE_SIZE)
2338                 return generic_writepages(mapping, wbc);
2339
2340         xid = get_xid();
2341         if (wbc->range_cyclic) {
2342                 index = mapping->writeback_index; /* Start from prev offset */
2343                 end = -1;
2344         } else {
2345                 index = wbc->range_start >> PAGE_SHIFT;
2346                 end = wbc->range_end >> PAGE_SHIFT;
2347                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2348                         range_whole = true;
2349                 scanned = true;
2350         }
2351         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2352
2353 retry:
2354         while (!done && index <= end) {
2355                 unsigned int i, nr_pages, found_pages, wsize;
2356                 pgoff_t next = 0, tofind, saved_index = index;
2357                 struct cifs_credits credits_on_stack;
2358                 struct cifs_credits *credits = &credits_on_stack;
2359                 int get_file_rc = 0;
2360
2361                 if (cfile)
2362                         cifsFileInfo_put(cfile);
2363
2364                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2365
2366                 /* in case of an error store it to return later */
2367                 if (rc)
2368                         get_file_rc = rc;
2369
2370                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2371                                                    &wsize, credits);
2372                 if (rc != 0) {
2373                         done = true;
2374                         break;
2375                 }
2376
2377                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2378
2379                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2380                                                   &found_pages);
2381                 if (!wdata) {
2382                         rc = -ENOMEM;
2383                         done = true;
2384                         add_credits_and_wake_if(server, credits, 0);
2385                         break;
2386                 }
2387
2388                 if (found_pages == 0) {
2389                         kref_put(&wdata->refcount, cifs_writedata_release);
2390                         add_credits_and_wake_if(server, credits, 0);
2391                         break;
2392                 }
2393
2394                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2395                                                end, &index, &next, &done);
2396
2397                 /* nothing to write? */
2398                 if (nr_pages == 0) {
2399                         kref_put(&wdata->refcount, cifs_writedata_release);
2400                         add_credits_and_wake_if(server, credits, 0);
2401                         continue;
2402                 }
2403
2404                 wdata->credits = credits_on_stack;
2405                 wdata->cfile = cfile;
2406                 wdata->server = server;
2407                 cfile = NULL;
2408
2409                 if (!wdata->cfile) {
2410                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2411                                  get_file_rc);
2412                         if (is_retryable_error(get_file_rc))
2413                                 rc = get_file_rc;
2414                         else
2415                                 rc = -EBADF;
2416                 } else
2417                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2418
2419                 for (i = 0; i < nr_pages; ++i)
2420                         unlock_page(wdata->pages[i]);
2421
2422                 /* send failure -- clean up the mess */
2423                 if (rc != 0) {
2424                         add_credits_and_wake_if(server, &wdata->credits, 0);
2425                         for (i = 0; i < nr_pages; ++i) {
2426                                 if (is_retryable_error(rc))
2427                                         redirty_page_for_writepage(wbc,
2428                                                            wdata->pages[i]);
2429                                 else
2430                                         SetPageError(wdata->pages[i]);
2431                                 end_page_writeback(wdata->pages[i]);
2432                                 put_page(wdata->pages[i]);
2433                         }
2434                         if (!is_retryable_error(rc))
2435                                 mapping_set_error(mapping, rc);
2436                 }
2437                 kref_put(&wdata->refcount, cifs_writedata_release);
2438
2439                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2440                         index = saved_index;
2441                         continue;
2442                 }
2443
2444                 /* Return immediately if we received a signal during writing */
2445                 if (is_interrupt_error(rc)) {
2446                         done = true;
2447                         break;
2448                 }
2449
2450                 if (rc != 0 && saved_rc == 0)
2451                         saved_rc = rc;
2452
2453                 wbc->nr_to_write -= nr_pages;
2454                 if (wbc->nr_to_write <= 0)
2455                         done = true;
2456
2457                 index = next;
2458         }
2459
2460         if (!scanned && !done) {
2461                 /*
2462                  * We hit the last page and there is more work to be done: wrap
2463                  * back to the start of the file
2464                  */
2465                 scanned = true;
2466                 index = 0;
2467                 goto retry;
2468         }
2469
2470         if (saved_rc != 0)
2471                 rc = saved_rc;
2472
2473         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2474                 mapping->writeback_index = index;
2475
2476         if (cfile)
2477                 cifsFileInfo_put(cfile);
2478         free_xid(xid);
2479         return rc;
2480 }
2481
2482 static int
2483 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2484 {
2485         int rc;
2486         unsigned int xid;
2487
2488         xid = get_xid();
2489 /* BB add check for wbc flags */
2490         get_page(page);
2491         if (!PageUptodate(page))
2492                 cifs_dbg(FYI, "ppw - page not up to date\n");
2493
2494         /*
2495          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2496          *
2497          * A writepage() implementation always needs to do either this,
2498          * or re-dirty the page with "redirty_page_for_writepage()" in
2499          * the case of a failure.
2500          *
2501          * Just unlocking the page will cause the radix tree tag-bits
2502          * to fail to update with the state of the page correctly.
2503          */
2504         set_page_writeback(page);
2505 retry_write:
2506         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2507         if (is_retryable_error(rc)) {
2508                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2509                         goto retry_write;
2510                 redirty_page_for_writepage(wbc, page);
2511         } else if (rc != 0) {
2512                 SetPageError(page);
2513                 mapping_set_error(page->mapping, rc);
2514         } else {
2515                 SetPageUptodate(page);
2516         }
2517         end_page_writeback(page);
2518         put_page(page);
2519         free_xid(xid);
2520         return rc;
2521 }
2522
2523 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2524 {
2525         int rc = cifs_writepage_locked(page, wbc);
2526         unlock_page(page);
2527         return rc;
2528 }
2529
2530 static int cifs_write_end(struct file *file, struct address_space *mapping,
2531                         loff_t pos, unsigned len, unsigned copied,
2532                         struct page *page, void *fsdata)
2533 {
2534         int rc;
2535         struct inode *inode = mapping->host;
2536         struct cifsFileInfo *cfile = file->private_data;
2537         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2538         __u32 pid;
2539
2540         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2541                 pid = cfile->pid;
2542         else
2543                 pid = current->tgid;
2544
2545         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2546                  page, pos, copied);
2547
2548         if (PageChecked(page)) {
2549                 if (copied == len)
2550                         SetPageUptodate(page);
2551                 ClearPageChecked(page);
2552         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2553                 SetPageUptodate(page);
2554
2555         if (!PageUptodate(page)) {
2556                 char *page_data;
2557                 unsigned offset = pos & (PAGE_SIZE - 1);
2558                 unsigned int xid;
2559
2560                 xid = get_xid();
2561                 /* this is probably better than directly calling
2562                    partialpage_write since in this function the file handle is
2563                    known which we might as well leverage */
2564                 /* BB check if anything else missing out of ppw
2565                    such as updating last write time */
2566                 page_data = kmap(page);
2567                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2568                 /* if (rc < 0) should we set writebehind rc? */
2569                 kunmap(page);
2570
2571                 free_xid(xid);
2572         } else {
2573                 rc = copied;
2574                 pos += copied;
2575                 set_page_dirty(page);
2576         }
2577
2578         if (rc > 0) {
2579                 spin_lock(&inode->i_lock);
2580                 if (pos > inode->i_size)
2581                         i_size_write(inode, pos);
2582                 spin_unlock(&inode->i_lock);
2583         }
2584
2585         unlock_page(page);
2586         put_page(page);
2587
2588         return rc;
2589 }
2590
2591 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2592                       int datasync)
2593 {
2594         unsigned int xid;
2595         int rc = 0;
2596         struct cifs_tcon *tcon;
2597         struct TCP_Server_Info *server;
2598         struct cifsFileInfo *smbfile = file->private_data;
2599         struct inode *inode = file_inode(file);
2600         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2601
2602         rc = file_write_and_wait_range(file, start, end);
2603         if (rc) {
2604                 trace_cifs_fsync_err(inode->i_ino, rc);
2605                 return rc;
2606         }
2607
2608         xid = get_xid();
2609
2610         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2611                  file, datasync);
2612
2613         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2614                 rc = cifs_zap_mapping(inode);
2615                 if (rc) {
2616                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2617                         rc = 0; /* don't care about it in fsync */
2618                 }
2619         }
2620
2621         tcon = tlink_tcon(smbfile->tlink);
2622         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2623                 server = tcon->ses->server;
2624                 if (server->ops->flush == NULL) {
2625                         rc = -ENOSYS;
2626                         goto strict_fsync_exit;
2627                 }
2628
2629                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2630                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2631                         if (smbfile) {
2632                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2633                                 cifsFileInfo_put(smbfile);
2634                         } else
2635                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2636                 } else
2637                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2638         }
2639
2640 strict_fsync_exit:
2641         free_xid(xid);
2642         return rc;
2643 }
2644
2645 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2646 {
2647         unsigned int xid;
2648         int rc = 0;
2649         struct cifs_tcon *tcon;
2650         struct TCP_Server_Info *server;
2651         struct cifsFileInfo *smbfile = file->private_data;
2652         struct inode *inode = file_inode(file);
2653         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2654
2655         rc = file_write_and_wait_range(file, start, end);
2656         if (rc) {
2657                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2658                 return rc;
2659         }
2660
2661         xid = get_xid();
2662
2663         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2664                  file, datasync);
2665
2666         tcon = tlink_tcon(smbfile->tlink);
2667         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2668                 server = tcon->ses->server;
2669                 if (server->ops->flush == NULL) {
2670                         rc = -ENOSYS;
2671                         goto fsync_exit;
2672                 }
2673
2674                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2675                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2676                         if (smbfile) {
2677                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2678                                 cifsFileInfo_put(smbfile);
2679                         } else
2680                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2681                 } else
2682                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2683         }
2684
2685 fsync_exit:
2686         free_xid(xid);
2687         return rc;
2688 }
2689
2690 /*
2691  * As file closes, flush all cached write data for this inode checking
2692  * for write behind errors.
2693  */
2694 int cifs_flush(struct file *file, fl_owner_t id)
2695 {
2696         struct inode *inode = file_inode(file);
2697         int rc = 0;
2698
2699         if (file->f_mode & FMODE_WRITE)
2700                 rc = filemap_write_and_wait(inode->i_mapping);
2701
2702         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2703         if (rc)
2704                 trace_cifs_flush_err(inode->i_ino, rc);
2705         return rc;
2706 }
2707
2708 static int
2709 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2710 {
2711         int rc = 0;
2712         unsigned long i;
2713
2714         for (i = 0; i < num_pages; i++) {
2715                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2716                 if (!pages[i]) {
2717                         /*
2718                          * save number of pages we have already allocated and
2719                          * return with ENOMEM error
2720                          */
2721                         num_pages = i;
2722                         rc = -ENOMEM;
2723                         break;
2724                 }
2725         }
2726
2727         if (rc) {
2728                 for (i = 0; i < num_pages; i++)
2729                         put_page(pages[i]);
2730         }
2731         return rc;
2732 }
2733
2734 static inline
2735 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2736 {
2737         size_t num_pages;
2738         size_t clen;
2739
2740         clen = min_t(const size_t, len, wsize);
2741         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2742
2743         if (cur_len)
2744                 *cur_len = clen;
2745
2746         return num_pages;
2747 }
2748
2749 static void
2750 cifs_uncached_writedata_release(struct kref *refcount)
2751 {
2752         int i;
2753         struct cifs_writedata *wdata = container_of(refcount,
2754                                         struct cifs_writedata, refcount);
2755
2756         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2757         for (i = 0; i < wdata->nr_pages; i++)
2758                 put_page(wdata->pages[i]);
2759         cifs_writedata_release(refcount);
2760 }
2761
2762 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2763
2764 static void
2765 cifs_uncached_writev_complete(struct work_struct *work)
2766 {
2767         struct cifs_writedata *wdata = container_of(work,
2768                                         struct cifs_writedata, work);
2769         struct inode *inode = d_inode(wdata->cfile->dentry);
2770         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2771
2772         spin_lock(&inode->i_lock);
2773         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2774         if (cifsi->server_eof > inode->i_size)
2775                 i_size_write(inode, cifsi->server_eof);
2776         spin_unlock(&inode->i_lock);
2777
2778         complete(&wdata->done);
2779         collect_uncached_write_data(wdata->ctx);
2780         /* the below call can possibly free the last ref to aio ctx */
2781         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2782 }
2783
2784 static int
2785 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2786                       size_t *len, unsigned long *num_pages)
2787 {
2788         size_t save_len, copied, bytes, cur_len = *len;
2789         unsigned long i, nr_pages = *num_pages;
2790
2791         save_len = cur_len;
2792         for (i = 0; i < nr_pages; i++) {
2793                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2794                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2795                 cur_len -= copied;
2796                 /*
2797                  * If we didn't copy as much as we expected, then that
2798                  * may mean we trod into an unmapped area. Stop copying
2799                  * at that point. On the next pass through the big
2800                  * loop, we'll likely end up getting a zero-length
2801                  * write and bailing out of it.
2802                  */
2803                 if (copied < bytes)
2804                         break;
2805         }
2806         cur_len = save_len - cur_len;
2807         *len = cur_len;
2808
2809         /*
2810          * If we have no data to send, then that probably means that
2811          * the copy above failed altogether. That's most likely because
2812          * the address in the iovec was bogus. Return -EFAULT and let
2813          * the caller free anything we allocated and bail out.
2814          */
2815         if (!cur_len)
2816                 return -EFAULT;
2817
2818         /*
2819          * i + 1 now represents the number of pages we actually used in
2820          * the copy phase above.
2821          */
2822         *num_pages = i + 1;
2823         return 0;
2824 }
2825
2826 static int
2827 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2828         struct cifs_aio_ctx *ctx)
2829 {
2830         unsigned int wsize;
2831         struct cifs_credits credits;
2832         int rc;
2833         struct TCP_Server_Info *server = wdata->server;
2834
2835         do {
2836                 if (wdata->cfile->invalidHandle) {
2837                         rc = cifs_reopen_file(wdata->cfile, false);
2838                         if (rc == -EAGAIN)
2839                                 continue;
2840                         else if (rc)
2841                                 break;
2842                 }
2843
2844
2845                 /*
2846                  * Wait for credits to resend this wdata.
2847                  * Note: we are attempting to resend the whole wdata not in
2848                  * segments
2849                  */
2850                 do {
2851                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2852                                                 &wsize, &credits);
2853                         if (rc)
2854                                 goto fail;
2855
2856                         if (wsize < wdata->bytes) {
2857                                 add_credits_and_wake_if(server, &credits, 0);
2858                                 msleep(1000);
2859                         }
2860                 } while (wsize < wdata->bytes);
2861                 wdata->credits = credits;
2862
2863                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2864
2865                 if (!rc) {
2866                         if (wdata->cfile->invalidHandle)
2867                                 rc = -EAGAIN;
2868                         else {
2869 #ifdef CONFIG_CIFS_SMB_DIRECT
2870                                 if (wdata->mr) {
2871                                         wdata->mr->need_invalidate = true;
2872                                         smbd_deregister_mr(wdata->mr);
2873                                         wdata->mr = NULL;
2874                                 }
2875 #endif
2876                                 rc = server->ops->async_writev(wdata,
2877                                         cifs_uncached_writedata_release);
2878                         }
2879                 }
2880
2881                 /* If the write was successfully sent, we are done */
2882                 if (!rc) {
2883                         list_add_tail(&wdata->list, wdata_list);
2884                         return 0;
2885                 }
2886
2887                 /* Roll back credits and retry if needed */
2888                 add_credits_and_wake_if(server, &wdata->credits, 0);
2889         } while (rc == -EAGAIN);
2890
2891 fail:
2892         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2893         return rc;
2894 }
2895
2896 static int
2897 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2898                      struct cifsFileInfo *open_file,
2899                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2900                      struct cifs_aio_ctx *ctx)
2901 {
2902         int rc = 0;
2903         size_t cur_len;
2904         unsigned long nr_pages, num_pages, i;
2905         struct cifs_writedata *wdata;
2906         struct iov_iter saved_from = *from;
2907         loff_t saved_offset = offset;
2908         pid_t pid;
2909         struct TCP_Server_Info *server;
2910         struct page **pagevec;
2911         size_t start;
2912         unsigned int xid;
2913
2914         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2915                 pid = open_file->pid;
2916         else
2917                 pid = current->tgid;
2918
2919         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2920         xid = get_xid();
2921
2922         do {
2923                 unsigned int wsize;
2924                 struct cifs_credits credits_on_stack;
2925                 struct cifs_credits *credits = &credits_on_stack;
2926
2927                 if (open_file->invalidHandle) {
2928                         rc = cifs_reopen_file(open_file, false);
2929                         if (rc == -EAGAIN)
2930                                 continue;
2931                         else if (rc)
2932                                 break;
2933                 }
2934
2935                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2936                                                    &wsize, credits);
2937                 if (rc)
2938                         break;
2939
2940                 cur_len = min_t(const size_t, len, wsize);
2941
2942                 if (ctx->direct_io) {
2943                         ssize_t result;
2944
2945                         result = iov_iter_get_pages_alloc(
2946                                 from, &pagevec, cur_len, &start);
2947                         if (result < 0) {
2948                                 cifs_dbg(VFS,
2949                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2950                                          result, iov_iter_type(from),
2951                                          from->iov_offset, from->count);
2952                                 dump_stack();
2953
2954                                 rc = result;
2955                                 add_credits_and_wake_if(server, credits, 0);
2956                                 break;
2957                         }
2958                         cur_len = (size_t)result;
2959                         iov_iter_advance(from, cur_len);
2960
2961                         nr_pages =
2962                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2963
2964                         wdata = cifs_writedata_direct_alloc(pagevec,
2965                                              cifs_uncached_writev_complete);
2966                         if (!wdata) {
2967                                 rc = -ENOMEM;
2968                                 add_credits_and_wake_if(server, credits, 0);
2969                                 break;
2970                         }
2971
2972
2973                         wdata->page_offset = start;
2974                         wdata->tailsz =
2975                                 nr_pages > 1 ?
2976                                         cur_len - (PAGE_SIZE - start) -
2977                                         (nr_pages - 2) * PAGE_SIZE :
2978                                         cur_len;
2979                 } else {
2980                         nr_pages = get_numpages(wsize, len, &cur_len);
2981                         wdata = cifs_writedata_alloc(nr_pages,
2982                                              cifs_uncached_writev_complete);
2983                         if (!wdata) {
2984                                 rc = -ENOMEM;
2985                                 add_credits_and_wake_if(server, credits, 0);
2986                                 break;
2987                         }
2988
2989                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2990                         if (rc) {
2991                                 kvfree(wdata->pages);
2992                                 kfree(wdata);
2993                                 add_credits_and_wake_if(server, credits, 0);
2994                                 break;
2995                         }
2996
2997                         num_pages = nr_pages;
2998                         rc = wdata_fill_from_iovec(
2999                                 wdata, from, &cur_len, &num_pages);
3000                         if (rc) {
3001                                 for (i = 0; i < nr_pages; i++)
3002                                         put_page(wdata->pages[i]);
3003                                 kvfree(wdata->pages);
3004                                 kfree(wdata);
3005                                 add_credits_and_wake_if(server, credits, 0);
3006                                 break;
3007                         }
3008
3009                         /*
3010                          * Bring nr_pages down to the number of pages we
3011                          * actually used, and free any pages that we didn't use.
3012                          */
3013                         for ( ; nr_pages > num_pages; nr_pages--)
3014                                 put_page(wdata->pages[nr_pages - 1]);
3015
3016                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3017                 }
3018
3019                 wdata->sync_mode = WB_SYNC_ALL;
3020                 wdata->nr_pages = nr_pages;
3021                 wdata->offset = (__u64)offset;
3022                 wdata->cfile = cifsFileInfo_get(open_file);
3023                 wdata->server = server;
3024                 wdata->pid = pid;
3025                 wdata->bytes = cur_len;
3026                 wdata->pagesz = PAGE_SIZE;
3027                 wdata->credits = credits_on_stack;
3028                 wdata->ctx = ctx;
3029                 kref_get(&ctx->refcount);
3030
3031                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3032
3033                 if (!rc) {
3034                         if (wdata->cfile->invalidHandle)
3035                                 rc = -EAGAIN;
3036                         else
3037                                 rc = server->ops->async_writev(wdata,
3038                                         cifs_uncached_writedata_release);
3039                 }
3040
3041                 if (rc) {
3042                         add_credits_and_wake_if(server, &wdata->credits, 0);
3043                         kref_put(&wdata->refcount,
3044                                  cifs_uncached_writedata_release);
3045                         if (rc == -EAGAIN) {
3046                                 *from = saved_from;
3047                                 iov_iter_advance(from, offset - saved_offset);
3048                                 continue;
3049                         }
3050                         break;
3051                 }
3052
3053                 list_add_tail(&wdata->list, wdata_list);
3054                 offset += cur_len;
3055                 len -= cur_len;
3056         } while (len > 0);
3057
3058         free_xid(xid);
3059         return rc;
3060 }
3061
3062 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3063 {
3064         struct cifs_writedata *wdata, *tmp;
3065         struct cifs_tcon *tcon;
3066         struct cifs_sb_info *cifs_sb;
3067         struct dentry *dentry = ctx->cfile->dentry;
3068         ssize_t rc;
3069
3070         tcon = tlink_tcon(ctx->cfile->tlink);
3071         cifs_sb = CIFS_SB(dentry->d_sb);
3072
3073         mutex_lock(&ctx->aio_mutex);
3074
3075         if (list_empty(&ctx->list)) {
3076                 mutex_unlock(&ctx->aio_mutex);
3077                 return;
3078         }
3079
3080         rc = ctx->rc;
3081         /*
3082          * Wait for and collect replies for any successful sends in order of
3083          * increasing offset. Once an error is hit, then return without waiting
3084          * for any more replies.
3085          */
3086 restart_loop:
3087         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3088                 if (!rc) {
3089                         if (!try_wait_for_completion(&wdata->done)) {
3090                                 mutex_unlock(&ctx->aio_mutex);
3091                                 return;
3092                         }
3093
3094                         if (wdata->result)
3095                                 rc = wdata->result;
3096                         else
3097                                 ctx->total_len += wdata->bytes;
3098
3099                         /* resend call if it's a retryable error */
3100                         if (rc == -EAGAIN) {
3101                                 struct list_head tmp_list;
3102                                 struct iov_iter tmp_from = ctx->iter;
3103
3104                                 INIT_LIST_HEAD(&tmp_list);
3105                                 list_del_init(&wdata->list);
3106
3107                                 if (ctx->direct_io)
3108                                         rc = cifs_resend_wdata(
3109                                                 wdata, &tmp_list, ctx);
3110                                 else {
3111                                         iov_iter_advance(&tmp_from,
3112                                                  wdata->offset - ctx->pos);
3113
3114                                         rc = cifs_write_from_iter(wdata->offset,
3115                                                 wdata->bytes, &tmp_from,
3116                                                 ctx->cfile, cifs_sb, &tmp_list,
3117                                                 ctx);
3118
3119                                         kref_put(&wdata->refcount,
3120                                                 cifs_uncached_writedata_release);
3121                                 }
3122
3123                                 list_splice(&tmp_list, &ctx->list);
3124                                 goto restart_loop;
3125                         }
3126                 }
3127                 list_del_init(&wdata->list);
3128                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3129         }
3130
3131         cifs_stats_bytes_written(tcon, ctx->total_len);
3132         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3133
3134         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3135
3136         mutex_unlock(&ctx->aio_mutex);
3137
3138         if (ctx->iocb && ctx->iocb->ki_complete)
3139                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3140         else
3141                 complete(&ctx->done);
3142 }
3143
3144 static ssize_t __cifs_writev(
3145         struct kiocb *iocb, struct iov_iter *from, bool direct)
3146 {
3147         struct file *file = iocb->ki_filp;
3148         ssize_t total_written = 0;
3149         struct cifsFileInfo *cfile;
3150         struct cifs_tcon *tcon;
3151         struct cifs_sb_info *cifs_sb;
3152         struct cifs_aio_ctx *ctx;
3153         struct iov_iter saved_from = *from;
3154         size_t len = iov_iter_count(from);
3155         int rc;
3156
3157         /*
3158          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3159          * In this case, fall back to non-direct write function.
3160          * this could be improved by getting pages directly in ITER_KVEC
3161          */
3162         if (direct && iov_iter_is_kvec(from)) {
3163                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3164                 direct = false;
3165         }
3166
3167         rc = generic_write_checks(iocb, from);
3168         if (rc <= 0)
3169                 return rc;
3170
3171         cifs_sb = CIFS_FILE_SB(file);
3172         cfile = file->private_data;
3173         tcon = tlink_tcon(cfile->tlink);
3174
3175         if (!tcon->ses->server->ops->async_writev)
3176                 return -ENOSYS;
3177
3178         ctx = cifs_aio_ctx_alloc();
3179         if (!ctx)
3180                 return -ENOMEM;
3181
3182         ctx->cfile = cifsFileInfo_get(cfile);
3183
3184         if (!is_sync_kiocb(iocb))
3185                 ctx->iocb = iocb;
3186
3187         ctx->pos = iocb->ki_pos;
3188
3189         if (direct) {
3190                 ctx->direct_io = true;
3191                 ctx->iter = *from;
3192                 ctx->len = len;
3193         } else {
3194                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3195                 if (rc) {
3196                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3197                         return rc;
3198                 }
3199         }
3200
3201         /* grab a lock here due to read response handlers can access ctx */
3202         mutex_lock(&ctx->aio_mutex);
3203
3204         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3205                                   cfile, cifs_sb, &ctx->list, ctx);
3206
3207         /*
3208          * If at least one write was successfully sent, then discard any rc
3209          * value from the later writes. If the other write succeeds, then
3210          * we'll end up returning whatever was written. If it fails, then
3211          * we'll get a new rc value from that.
3212          */
3213         if (!list_empty(&ctx->list))
3214                 rc = 0;
3215
3216         mutex_unlock(&ctx->aio_mutex);
3217
3218         if (rc) {
3219                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3220                 return rc;
3221         }
3222
3223         if (!is_sync_kiocb(iocb)) {
3224                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3225                 return -EIOCBQUEUED;
3226         }
3227
3228         rc = wait_for_completion_killable(&ctx->done);
3229         if (rc) {
3230                 mutex_lock(&ctx->aio_mutex);
3231                 ctx->rc = rc = -EINTR;
3232                 total_written = ctx->total_len;
3233                 mutex_unlock(&ctx->aio_mutex);
3234         } else {
3235                 rc = ctx->rc;
3236                 total_written = ctx->total_len;
3237         }
3238
3239         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3240
3241         if (unlikely(!total_written))
3242                 return rc;
3243
3244         iocb->ki_pos += total_written;
3245         return total_written;
3246 }
3247
3248 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3249 {
3250         struct file *file = iocb->ki_filp;
3251
3252         cifs_revalidate_mapping(file->f_inode);
3253         return __cifs_writev(iocb, from, true);
3254 }
3255
3256 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3257 {
3258         return __cifs_writev(iocb, from, false);
3259 }
3260
3261 static ssize_t
3262 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3263 {
3264         struct file *file = iocb->ki_filp;
3265         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3266         struct inode *inode = file->f_mapping->host;
3267         struct cifsInodeInfo *cinode = CIFS_I(inode);
3268         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3269         ssize_t rc;
3270
3271         inode_lock(inode);
3272         /*
3273          * We need to hold the sem to be sure nobody modifies lock list
3274          * with a brlock that prevents writing.
3275          */
3276         down_read(&cinode->lock_sem);
3277
3278         rc = generic_write_checks(iocb, from);
3279         if (rc <= 0)
3280                 goto out;
3281
3282         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3283                                      server->vals->exclusive_lock_type, 0,
3284                                      NULL, CIFS_WRITE_OP))
3285                 rc = __generic_file_write_iter(iocb, from);
3286         else
3287                 rc = -EACCES;
3288 out:
3289         up_read(&cinode->lock_sem);
3290         inode_unlock(inode);
3291
3292         if (rc > 0)
3293                 rc = generic_write_sync(iocb, rc);
3294         return rc;
3295 }
3296
3297 ssize_t
3298 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3299 {
3300         struct inode *inode = file_inode(iocb->ki_filp);
3301         struct cifsInodeInfo *cinode = CIFS_I(inode);
3302         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3303         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3304                                                 iocb->ki_filp->private_data;
3305         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3306         ssize_t written;
3307
3308         written = cifs_get_writer(cinode);
3309         if (written)
3310                 return written;
3311
3312         if (CIFS_CACHE_WRITE(cinode)) {
3313                 if (cap_unix(tcon->ses) &&
3314                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3315                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3316                         written = generic_file_write_iter(iocb, from);
3317                         goto out;
3318                 }
3319                 written = cifs_writev(iocb, from);
3320                 goto out;
3321         }
3322         /*
3323          * For non-oplocked files in strict cache mode we need to write the data
3324          * to the server exactly from the pos to pos+len-1 rather than flush all
3325          * affected pages because it may cause a error with mandatory locks on
3326          * these pages but not on the region from pos to ppos+len-1.
3327          */
3328         written = cifs_user_writev(iocb, from);
3329         if (CIFS_CACHE_READ(cinode)) {
3330                 /*
3331                  * We have read level caching and we have just sent a write
3332                  * request to the server thus making data in the cache stale.
3333                  * Zap the cache and set oplock/lease level to NONE to avoid
3334                  * reading stale data from the cache. All subsequent read
3335                  * operations will read new data from the server.
3336                  */
3337                 cifs_zap_mapping(inode);
3338                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3339                          inode);
3340                 cinode->oplock = 0;
3341         }
3342 out:
3343         cifs_put_writer(cinode);
3344         return written;
3345 }
3346
3347 static struct cifs_readdata *
3348 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3349 {
3350         struct cifs_readdata *rdata;
3351
3352         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3353         if (rdata != NULL) {
3354                 rdata->pages = pages;
3355                 kref_init(&rdata->refcount);
3356                 INIT_LIST_HEAD(&rdata->list);
3357                 init_completion(&rdata->done);
3358                 INIT_WORK(&rdata->work, complete);
3359         }
3360
3361         return rdata;
3362 }
3363
3364 static struct cifs_readdata *
3365 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3366 {
3367         struct page **pages =
3368                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3369         struct cifs_readdata *ret = NULL;
3370
3371         if (pages) {
3372                 ret = cifs_readdata_direct_alloc(pages, complete);
3373                 if (!ret)
3374                         kfree(pages);
3375         }
3376
3377         return ret;
3378 }
3379
3380 void
3381 cifs_readdata_release(struct kref *refcount)
3382 {
3383         struct cifs_readdata *rdata = container_of(refcount,
3384                                         struct cifs_readdata, refcount);
3385 #ifdef CONFIG_CIFS_SMB_DIRECT
3386         if (rdata->mr) {
3387                 smbd_deregister_mr(rdata->mr);
3388                 rdata->mr = NULL;
3389         }
3390 #endif
3391         if (rdata->cfile)
3392                 cifsFileInfo_put(rdata->cfile);
3393
3394         kvfree(rdata->pages);
3395         kfree(rdata);
3396 }
3397
3398 static int
3399 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3400 {
3401         int rc = 0;
3402         struct page *page;
3403         unsigned int i;
3404
3405         for (i = 0; i < nr_pages; i++) {
3406                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3407                 if (!page) {
3408                         rc = -ENOMEM;
3409                         break;
3410                 }
3411                 rdata->pages[i] = page;
3412         }
3413
3414         if (rc) {
3415                 unsigned int nr_page_failed = i;
3416
3417                 for (i = 0; i < nr_page_failed; i++) {
3418                         put_page(rdata->pages[i]);
3419                         rdata->pages[i] = NULL;
3420                 }
3421         }
3422         return rc;
3423 }
3424
3425 static void
3426 cifs_uncached_readdata_release(struct kref *refcount)
3427 {
3428         struct cifs_readdata *rdata = container_of(refcount,
3429                                         struct cifs_readdata, refcount);
3430         unsigned int i;
3431
3432         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3433         for (i = 0; i < rdata->nr_pages; i++) {
3434                 put_page(rdata->pages[i]);
3435         }
3436         cifs_readdata_release(refcount);
3437 }
3438
3439 /**
3440  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3441  * @rdata:      the readdata response with list of pages holding data
3442  * @iter:       destination for our data
3443  *
3444  * This function copies data from a list of pages in a readdata response into
3445  * an array of iovecs. It will first calculate where the data should go
3446  * based on the info in the readdata and then copy the data into that spot.
3447  */
3448 static int
3449 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3450 {
3451         size_t remaining = rdata->got_bytes;
3452         unsigned int i;
3453
3454         for (i = 0; i < rdata->nr_pages; i++) {
3455                 struct page *page = rdata->pages[i];
3456                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3457                 size_t written;
3458
3459                 if (unlikely(iov_iter_is_pipe(iter))) {
3460                         void *addr = kmap_atomic(page);
3461
3462                         written = copy_to_iter(addr, copy, iter);
3463                         kunmap_atomic(addr);
3464                 } else
3465                         written = copy_page_to_iter(page, 0, copy, iter);
3466                 remaining -= written;
3467                 if (written < copy && iov_iter_count(iter) > 0)
3468                         break;
3469         }
3470         return remaining ? -EFAULT : 0;
3471 }
3472
3473 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3474
3475 static void
3476 cifs_uncached_readv_complete(struct work_struct *work)
3477 {
3478         struct cifs_readdata *rdata = container_of(work,
3479                                                 struct cifs_readdata, work);
3480
3481         complete(&rdata->done);
3482         collect_uncached_read_data(rdata->ctx);
3483         /* the below call can possibly free the last ref to aio ctx */
3484         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3485 }
3486
3487 static int
3488 uncached_fill_pages(struct TCP_Server_Info *server,
3489                     struct cifs_readdata *rdata, struct iov_iter *iter,
3490                     unsigned int len)
3491 {
3492         int result = 0;
3493         unsigned int i;
3494         unsigned int nr_pages = rdata->nr_pages;
3495         unsigned int page_offset = rdata->page_offset;
3496
3497         rdata->got_bytes = 0;
3498         rdata->tailsz = PAGE_SIZE;
3499         for (i = 0; i < nr_pages; i++) {
3500                 struct page *page = rdata->pages[i];
3501                 size_t n;
3502                 unsigned int segment_size = rdata->pagesz;
3503
3504                 if (i == 0)
3505                         segment_size -= page_offset;
3506                 else
3507                         page_offset = 0;
3508
3509
3510                 if (len <= 0) {
3511                         /* no need to hold page hostage */
3512                         rdata->pages[i] = NULL;
3513                         rdata->nr_pages--;
3514                         put_page(page);
3515                         continue;
3516                 }
3517
3518                 n = len;
3519                 if (len >= segment_size)
3520                         /* enough data to fill the page */
3521                         n = segment_size;
3522                 else
3523                         rdata->tailsz = len;
3524                 len -= n;
3525
3526                 if (iter)
3527                         result = copy_page_from_iter(
3528                                         page, page_offset, n, iter);
3529 #ifdef CONFIG_CIFS_SMB_DIRECT
3530                 else if (rdata->mr)
3531                         result = n;
3532 #endif
3533                 else
3534                         result = cifs_read_page_from_socket(
3535                                         server, page, page_offset, n);
3536                 if (result < 0)
3537                         break;
3538
3539                 rdata->got_bytes += result;
3540         }
3541
3542         return result != -ECONNABORTED && rdata->got_bytes > 0 ?
3543                                                 rdata->got_bytes : result;
3544 }
3545
3546 static int
3547 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3548                               struct cifs_readdata *rdata, unsigned int len)
3549 {
3550         return uncached_fill_pages(server, rdata, NULL, len);
3551 }
3552
3553 static int
3554 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3555                               struct cifs_readdata *rdata,
3556                               struct iov_iter *iter)
3557 {
3558         return uncached_fill_pages(server, rdata, iter, iter->count);
3559 }
3560
3561 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3562                         struct list_head *rdata_list,
3563                         struct cifs_aio_ctx *ctx)
3564 {
3565         unsigned int rsize;
3566         struct cifs_credits credits;
3567         int rc;
3568         struct TCP_Server_Info *server;
3569
3570         /* XXX: should we pick a new channel here? */
3571         server = rdata->server;
3572
3573         do {
3574                 if (rdata->cfile->invalidHandle) {
3575                         rc = cifs_reopen_file(rdata->cfile, true);
3576                         if (rc == -EAGAIN)
3577                                 continue;
3578                         else if (rc)
3579                                 break;
3580                 }
3581
3582                 /*
3583                  * Wait for credits to resend this rdata.
3584                  * Note: we are attempting to resend the whole rdata not in
3585                  * segments
3586                  */
3587                 do {
3588                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3589                                                 &rsize, &credits);
3590
3591                         if (rc)
3592                                 goto fail;
3593
3594                         if (rsize < rdata->bytes) {
3595                                 add_credits_and_wake_if(server, &credits, 0);
3596                                 msleep(1000);
3597                         }
3598                 } while (rsize < rdata->bytes);
3599                 rdata->credits = credits;
3600
3601                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3602                 if (!rc) {
3603                         if (rdata->cfile->invalidHandle)
3604                                 rc = -EAGAIN;
3605                         else {
3606 #ifdef CONFIG_CIFS_SMB_DIRECT
3607                                 if (rdata->mr) {
3608                                         rdata->mr->need_invalidate = true;
3609                                         smbd_deregister_mr(rdata->mr);
3610                                         rdata->mr = NULL;
3611                                 }
3612 #endif
3613                                 rc = server->ops->async_readv(rdata);
3614                         }
3615                 }
3616
3617                 /* If the read was successfully sent, we are done */
3618                 if (!rc) {
3619                         /* Add to aio pending list */
3620                         list_add_tail(&rdata->list, rdata_list);
3621                         return 0;
3622                 }
3623
3624                 /* Roll back credits and retry if needed */
3625                 add_credits_and_wake_if(server, &rdata->credits, 0);
3626         } while (rc == -EAGAIN);
3627
3628 fail:
3629         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3630         return rc;
3631 }
3632
3633 static int
3634 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3635                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3636                      struct cifs_aio_ctx *ctx)
3637 {
3638         struct cifs_readdata *rdata;
3639         unsigned int npages, rsize;
3640         struct cifs_credits credits_on_stack;
3641         struct cifs_credits *credits = &credits_on_stack;
3642         size_t cur_len;
3643         int rc;
3644         pid_t pid;
3645         struct TCP_Server_Info *server;
3646         struct page **pagevec;
3647         size_t start;
3648         struct iov_iter direct_iov = ctx->iter;
3649
3650         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3651
3652         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3653                 pid = open_file->pid;
3654         else
3655                 pid = current->tgid;
3656
3657         if (ctx->direct_io)
3658                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3659
3660         do {
3661                 if (open_file->invalidHandle) {
3662                         rc = cifs_reopen_file(open_file, true);
3663                         if (rc == -EAGAIN)
3664                                 continue;
3665                         else if (rc)
3666                                 break;
3667                 }
3668
3669                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3670                                                    &rsize, credits);
3671                 if (rc)
3672                         break;
3673
3674                 cur_len = min_t(const size_t, len, rsize);
3675
3676                 if (ctx->direct_io) {
3677                         ssize_t result;
3678
3679                         result = iov_iter_get_pages_alloc(
3680                                         &direct_iov, &pagevec,
3681                                         cur_len, &start);
3682                         if (result < 0) {
3683                                 cifs_dbg(VFS,
3684                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3685                                          result, iov_iter_type(&direct_iov),
3686                                          direct_iov.iov_offset,
3687                                          direct_iov.count);
3688                                 dump_stack();
3689
3690                                 rc = result;
3691                                 add_credits_and_wake_if(server, credits, 0);
3692                                 break;
3693                         }
3694                         cur_len = (size_t)result;
3695                         iov_iter_advance(&direct_iov, cur_len);
3696
3697                         rdata = cifs_readdata_direct_alloc(
3698                                         pagevec, cifs_uncached_readv_complete);
3699                         if (!rdata) {
3700                                 add_credits_and_wake_if(server, credits, 0);
3701                                 rc = -ENOMEM;
3702                                 break;
3703                         }
3704
3705                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3706                         rdata->page_offset = start;
3707                         rdata->tailsz = npages > 1 ?
3708                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3709                                 cur_len;
3710
3711                 } else {
3712
3713                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3714                         /* allocate a readdata struct */
3715                         rdata = cifs_readdata_alloc(npages,
3716                                             cifs_uncached_readv_complete);
3717                         if (!rdata) {
3718                                 add_credits_and_wake_if(server, credits, 0);
3719                                 rc = -ENOMEM;
3720                                 break;
3721                         }
3722
3723                         rc = cifs_read_allocate_pages(rdata, npages);
3724                         if (rc) {
3725                                 kvfree(rdata->pages);
3726                                 kfree(rdata);
3727                                 add_credits_and_wake_if(server, credits, 0);
3728                                 break;
3729                         }
3730
3731                         rdata->tailsz = PAGE_SIZE;
3732                 }
3733
3734                 rdata->server = server;
3735                 rdata->cfile = cifsFileInfo_get(open_file);
3736                 rdata->nr_pages = npages;
3737                 rdata->offset = offset;
3738                 rdata->bytes = cur_len;
3739                 rdata->pid = pid;
3740                 rdata->pagesz = PAGE_SIZE;
3741                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3742                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3743                 rdata->credits = credits_on_stack;
3744                 rdata->ctx = ctx;
3745                 kref_get(&ctx->refcount);
3746
3747                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3748
3749                 if (!rc) {
3750                         if (rdata->cfile->invalidHandle)
3751                                 rc = -EAGAIN;
3752                         else
3753                                 rc = server->ops->async_readv(rdata);
3754                 }
3755
3756                 if (rc) {
3757                         add_credits_and_wake_if(server, &rdata->credits, 0);
3758                         kref_put(&rdata->refcount,
3759                                 cifs_uncached_readdata_release);
3760                         if (rc == -EAGAIN) {
3761                                 iov_iter_revert(&direct_iov, cur_len);
3762                                 continue;
3763                         }
3764                         break;
3765                 }
3766
3767                 list_add_tail(&rdata->list, rdata_list);
3768                 offset += cur_len;
3769                 len -= cur_len;
3770         } while (len > 0);
3771
3772         return rc;
3773 }
3774
3775 static void
3776 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3777 {
3778         struct cifs_readdata *rdata, *tmp;
3779         struct iov_iter *to = &ctx->iter;
3780         struct cifs_sb_info *cifs_sb;
3781         int rc;
3782
3783         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3784
3785         mutex_lock(&ctx->aio_mutex);
3786
3787         if (list_empty(&ctx->list)) {
3788                 mutex_unlock(&ctx->aio_mutex);
3789                 return;
3790         }
3791
3792         rc = ctx->rc;
3793         /* the loop below should proceed in the order of increasing offsets */
3794 again:
3795         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3796                 if (!rc) {
3797                         if (!try_wait_for_completion(&rdata->done)) {
3798                                 mutex_unlock(&ctx->aio_mutex);
3799                                 return;
3800                         }
3801
3802                         if (rdata->result == -EAGAIN) {
3803                                 /* resend call if it's a retryable error */
3804                                 struct list_head tmp_list;
3805                                 unsigned int got_bytes = rdata->got_bytes;
3806
3807                                 list_del_init(&rdata->list);
3808                                 INIT_LIST_HEAD(&tmp_list);
3809
3810                                 /*
3811                                  * Got a part of data and then reconnect has
3812                                  * happened -- fill the buffer and continue
3813                                  * reading.
3814                                  */
3815                                 if (got_bytes && got_bytes < rdata->bytes) {
3816                                         rc = 0;
3817                                         if (!ctx->direct_io)
3818                                                 rc = cifs_readdata_to_iov(rdata, to);
3819                                         if (rc) {
3820                                                 kref_put(&rdata->refcount,
3821                                                         cifs_uncached_readdata_release);
3822                                                 continue;
3823                                         }
3824                                 }
3825
3826                                 if (ctx->direct_io) {
3827                                         /*
3828                                          * Re-use rdata as this is a
3829                                          * direct I/O
3830                                          */
3831                                         rc = cifs_resend_rdata(
3832                                                 rdata,
3833                                                 &tmp_list, ctx);
3834                                 } else {
3835                                         rc = cifs_send_async_read(
3836                                                 rdata->offset + got_bytes,
3837                                                 rdata->bytes - got_bytes,
3838                                                 rdata->cfile, cifs_sb,
3839                                                 &tmp_list, ctx);
3840
3841                                         kref_put(&rdata->refcount,
3842                                                 cifs_uncached_readdata_release);
3843                                 }
3844
3845                                 list_splice(&tmp_list, &ctx->list);
3846
3847                                 goto again;
3848                         } else if (rdata->result)
3849                                 rc = rdata->result;
3850                         else if (!ctx->direct_io)
3851                                 rc = cifs_readdata_to_iov(rdata, to);
3852
3853                         /* if there was a short read -- discard anything left */
3854                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3855                                 rc = -ENODATA;
3856
3857                         ctx->total_len += rdata->got_bytes;
3858                 }
3859                 list_del_init(&rdata->list);
3860                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3861         }
3862
3863         if (!ctx->direct_io)
3864                 ctx->total_len = ctx->len - iov_iter_count(to);
3865
3866         /* mask nodata case */
3867         if (rc == -ENODATA)
3868                 rc = 0;
3869
3870         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3871
3872         mutex_unlock(&ctx->aio_mutex);
3873
3874         if (ctx->iocb && ctx->iocb->ki_complete)
3875                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3876         else
3877                 complete(&ctx->done);
3878 }
3879
3880 static ssize_t __cifs_readv(
3881         struct kiocb *iocb, struct iov_iter *to, bool direct)
3882 {
3883         size_t len;
3884         struct file *file = iocb->ki_filp;
3885         struct cifs_sb_info *cifs_sb;
3886         struct cifsFileInfo *cfile;
3887         struct cifs_tcon *tcon;
3888         ssize_t rc, total_read = 0;
3889         loff_t offset = iocb->ki_pos;
3890         struct cifs_aio_ctx *ctx;
3891
3892         /*
3893          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3894          * fall back to data copy read path
3895          * this could be improved by getting pages directly in ITER_KVEC
3896          */
3897         if (direct && iov_iter_is_kvec(to)) {
3898                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3899                 direct = false;
3900         }
3901
3902         len = iov_iter_count(to);
3903         if (!len)
3904                 return 0;
3905
3906         cifs_sb = CIFS_FILE_SB(file);
3907         cfile = file->private_data;
3908         tcon = tlink_tcon(cfile->tlink);
3909
3910         if (!tcon->ses->server->ops->async_readv)
3911                 return -ENOSYS;
3912
3913         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3914                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3915
3916         ctx = cifs_aio_ctx_alloc();
3917         if (!ctx)
3918                 return -ENOMEM;
3919
3920         ctx->cfile = cifsFileInfo_get(cfile);
3921
3922         if (!is_sync_kiocb(iocb))
3923                 ctx->iocb = iocb;
3924
3925         if (iter_is_iovec(to))
3926                 ctx->should_dirty = true;
3927
3928         if (direct) {
3929                 ctx->pos = offset;
3930                 ctx->direct_io = true;
3931                 ctx->iter = *to;
3932                 ctx->len = len;
3933         } else {
3934                 rc = setup_aio_ctx_iter(ctx, to, READ);
3935                 if (rc) {
3936                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3937                         return rc;
3938                 }
3939                 len = ctx->len;
3940         }
3941
3942         if (direct) {
3943                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
3944                                                   offset, offset + len - 1);
3945                 if (rc) {
3946                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3947                         return -EAGAIN;
3948                 }
3949         }
3950
3951         /* grab a lock here due to read response handlers can access ctx */
3952         mutex_lock(&ctx->aio_mutex);
3953
3954         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3955
3956         /* if at least one read request send succeeded, then reset rc */
3957         if (!list_empty(&ctx->list))
3958                 rc = 0;
3959
3960         mutex_unlock(&ctx->aio_mutex);
3961
3962         if (rc) {
3963                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3964                 return rc;
3965         }
3966
3967         if (!is_sync_kiocb(iocb)) {
3968                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3969                 return -EIOCBQUEUED;
3970         }
3971
3972         rc = wait_for_completion_killable(&ctx->done);
3973         if (rc) {
3974                 mutex_lock(&ctx->aio_mutex);
3975                 ctx->rc = rc = -EINTR;
3976                 total_read = ctx->total_len;
3977                 mutex_unlock(&ctx->aio_mutex);
3978         } else {
3979                 rc = ctx->rc;
3980                 total_read = ctx->total_len;
3981         }
3982
3983         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3984
3985         if (total_read) {
3986                 iocb->ki_pos += total_read;
3987                 return total_read;
3988         }
3989         return rc;
3990 }
3991
3992 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3993 {
3994         return __cifs_readv(iocb, to, true);
3995 }
3996
3997 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3998 {
3999         return __cifs_readv(iocb, to, false);
4000 }
4001
4002 ssize_t
4003 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4004 {
4005         struct inode *inode = file_inode(iocb->ki_filp);
4006         struct cifsInodeInfo *cinode = CIFS_I(inode);
4007         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4008         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4009                                                 iocb->ki_filp->private_data;
4010         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4011         int rc = -EACCES;
4012
4013         /*
4014          * In strict cache mode we need to read from the server all the time
4015          * if we don't have level II oplock because the server can delay mtime
4016          * change - so we can't make a decision about inode invalidating.
4017          * And we can also fail with pagereading if there are mandatory locks
4018          * on pages affected by this read but not on the region from pos to
4019          * pos+len-1.
4020          */
4021         if (!CIFS_CACHE_READ(cinode))
4022                 return cifs_user_readv(iocb, to);
4023
4024         if (cap_unix(tcon->ses) &&
4025             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4026             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4027                 return generic_file_read_iter(iocb, to);
4028
4029         /*
4030          * We need to hold the sem to be sure nobody modifies lock list
4031          * with a brlock that prevents reading.
4032          */
4033         down_read(&cinode->lock_sem);
4034         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4035                                      tcon->ses->server->vals->shared_lock_type,
4036                                      0, NULL, CIFS_READ_OP))
4037                 rc = generic_file_read_iter(iocb, to);
4038         up_read(&cinode->lock_sem);
4039         return rc;
4040 }
4041
4042 static ssize_t
4043 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4044 {
4045         int rc = -EACCES;
4046         unsigned int bytes_read = 0;
4047         unsigned int total_read;
4048         unsigned int current_read_size;
4049         unsigned int rsize;
4050         struct cifs_sb_info *cifs_sb;
4051         struct cifs_tcon *tcon;
4052         struct TCP_Server_Info *server;
4053         unsigned int xid;
4054         char *cur_offset;
4055         struct cifsFileInfo *open_file;
4056         struct cifs_io_parms io_parms = {0};
4057         int buf_type = CIFS_NO_BUFFER;
4058         __u32 pid;
4059
4060         xid = get_xid();
4061         cifs_sb = CIFS_FILE_SB(file);
4062
4063         /* FIXME: set up handlers for larger reads and/or convert to async */
4064         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4065
4066         if (file->private_data == NULL) {
4067                 rc = -EBADF;
4068                 free_xid(xid);
4069                 return rc;
4070         }
4071         open_file = file->private_data;
4072         tcon = tlink_tcon(open_file->tlink);
4073         server = cifs_pick_channel(tcon->ses);
4074
4075         if (!server->ops->sync_read) {
4076                 free_xid(xid);
4077                 return -ENOSYS;
4078         }
4079
4080         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4081                 pid = open_file->pid;
4082         else
4083                 pid = current->tgid;
4084
4085         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4086                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4087
4088         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4089              total_read += bytes_read, cur_offset += bytes_read) {
4090                 do {
4091                         current_read_size = min_t(uint, read_size - total_read,
4092                                                   rsize);
4093                         /*
4094                          * For windows me and 9x we do not want to request more
4095                          * than it negotiated since it will refuse the read
4096                          * then.
4097                          */
4098                         if (!(tcon->ses->capabilities &
4099                                 tcon->ses->server->vals->cap_large_files)) {
4100                                 current_read_size = min_t(uint,
4101                                         current_read_size, CIFSMaxBufSize);
4102                         }
4103                         if (open_file->invalidHandle) {
4104                                 rc = cifs_reopen_file(open_file, true);
4105                                 if (rc != 0)
4106                                         break;
4107                         }
4108                         io_parms.pid = pid;
4109                         io_parms.tcon = tcon;
4110                         io_parms.offset = *offset;
4111                         io_parms.length = current_read_size;
4112                         io_parms.server = server;
4113                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4114                                                     &bytes_read, &cur_offset,
4115                                                     &buf_type);
4116                 } while (rc == -EAGAIN);
4117
4118                 if (rc || (bytes_read == 0)) {
4119                         if (total_read) {
4120                                 break;
4121                         } else {
4122                                 free_xid(xid);
4123                                 return rc;
4124                         }
4125                 } else {
4126                         cifs_stats_bytes_read(tcon, total_read);
4127                         *offset += bytes_read;
4128                 }
4129         }
4130         free_xid(xid);
4131         return total_read;
4132 }
4133
4134 /*
4135  * If the page is mmap'ed into a process' page tables, then we need to make
4136  * sure that it doesn't change while being written back.
4137  */
4138 static vm_fault_t
4139 cifs_page_mkwrite(struct vm_fault *vmf)
4140 {
4141         struct page *page = vmf->page;
4142
4143         lock_page(page);
4144         return VM_FAULT_LOCKED;
4145 }
4146
4147 static const struct vm_operations_struct cifs_file_vm_ops = {
4148         .fault = filemap_fault,
4149         .map_pages = filemap_map_pages,
4150         .page_mkwrite = cifs_page_mkwrite,
4151 };
4152
4153 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4154 {
4155         int xid, rc = 0;
4156         struct inode *inode = file_inode(file);
4157
4158         xid = get_xid();
4159
4160         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4161                 rc = cifs_zap_mapping(inode);
4162         if (!rc)
4163                 rc = generic_file_mmap(file, vma);
4164         if (!rc)
4165                 vma->vm_ops = &cifs_file_vm_ops;
4166
4167         free_xid(xid);
4168         return rc;
4169 }
4170
4171 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4172 {
4173         int rc, xid;
4174
4175         xid = get_xid();
4176
4177         rc = cifs_revalidate_file(file);
4178         if (rc)
4179                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4180                          rc);
4181         if (!rc)
4182                 rc = generic_file_mmap(file, vma);
4183         if (!rc)
4184                 vma->vm_ops = &cifs_file_vm_ops;
4185
4186         free_xid(xid);
4187         return rc;
4188 }
4189
4190 static void
4191 cifs_readv_complete(struct work_struct *work)
4192 {
4193         unsigned int i, got_bytes;
4194         struct cifs_readdata *rdata = container_of(work,
4195                                                 struct cifs_readdata, work);
4196
4197         got_bytes = rdata->got_bytes;
4198         for (i = 0; i < rdata->nr_pages; i++) {
4199                 struct page *page = rdata->pages[i];
4200
4201                 lru_cache_add(page);
4202
4203                 if (rdata->result == 0 ||
4204                     (rdata->result == -EAGAIN && got_bytes)) {
4205                         flush_dcache_page(page);
4206                         SetPageUptodate(page);
4207                 }
4208
4209                 unlock_page(page);
4210
4211                 if (rdata->result == 0 ||
4212                     (rdata->result == -EAGAIN && got_bytes))
4213                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4214
4215                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4216
4217                 put_page(page);
4218                 rdata->pages[i] = NULL;
4219         }
4220         kref_put(&rdata->refcount, cifs_readdata_release);
4221 }
4222
4223 static int
4224 readpages_fill_pages(struct TCP_Server_Info *server,
4225                      struct cifs_readdata *rdata, struct iov_iter *iter,
4226                      unsigned int len)
4227 {
4228         int result = 0;
4229         unsigned int i;
4230         u64 eof;
4231         pgoff_t eof_index;
4232         unsigned int nr_pages = rdata->nr_pages;
4233         unsigned int page_offset = rdata->page_offset;
4234
4235         /* determine the eof that the server (probably) has */
4236         eof = CIFS_I(rdata->mapping->host)->server_eof;
4237         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4238         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4239
4240         rdata->got_bytes = 0;
4241         rdata->tailsz = PAGE_SIZE;
4242         for (i = 0; i < nr_pages; i++) {
4243                 struct page *page = rdata->pages[i];
4244                 unsigned int to_read = rdata->pagesz;
4245                 size_t n;
4246
4247                 if (i == 0)
4248                         to_read -= page_offset;
4249                 else
4250                         page_offset = 0;
4251
4252                 n = to_read;
4253
4254                 if (len >= to_read) {
4255                         len -= to_read;
4256                 } else if (len > 0) {
4257                         /* enough for partial page, fill and zero the rest */
4258                         zero_user(page, len + page_offset, to_read - len);
4259                         n = rdata->tailsz = len;
4260                         len = 0;
4261                 } else if (page->index > eof_index) {
4262                         /*
4263                          * The VFS will not try to do readahead past the
4264                          * i_size, but it's possible that we have outstanding
4265                          * writes with gaps in the middle and the i_size hasn't
4266                          * caught up yet. Populate those with zeroed out pages
4267                          * to prevent the VFS from repeatedly attempting to
4268                          * fill them until the writes are flushed.
4269                          */
4270                         zero_user(page, 0, PAGE_SIZE);
4271                         lru_cache_add(page);
4272                         flush_dcache_page(page);
4273                         SetPageUptodate(page);
4274                         unlock_page(page);
4275                         put_page(page);
4276                         rdata->pages[i] = NULL;
4277                         rdata->nr_pages--;
4278                         continue;
4279                 } else {
4280                         /* no need to hold page hostage */
4281                         lru_cache_add(page);
4282                         unlock_page(page);
4283                         put_page(page);
4284                         rdata->pages[i] = NULL;
4285                         rdata->nr_pages--;
4286                         continue;
4287                 }
4288
4289                 if (iter)
4290                         result = copy_page_from_iter(
4291                                         page, page_offset, n, iter);
4292 #ifdef CONFIG_CIFS_SMB_DIRECT
4293                 else if (rdata->mr)
4294                         result = n;
4295 #endif
4296                 else
4297                         result = cifs_read_page_from_socket(
4298                                         server, page, page_offset, n);
4299                 if (result < 0)
4300                         break;
4301
4302                 rdata->got_bytes += result;
4303         }
4304
4305         return result != -ECONNABORTED && rdata->got_bytes > 0 ?
4306                                                 rdata->got_bytes : result;
4307 }
4308
4309 static int
4310 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4311                                struct cifs_readdata *rdata, unsigned int len)
4312 {
4313         return readpages_fill_pages(server, rdata, NULL, len);
4314 }
4315
4316 static int
4317 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4318                                struct cifs_readdata *rdata,
4319                                struct iov_iter *iter)
4320 {
4321         return readpages_fill_pages(server, rdata, iter, iter->count);
4322 }
4323
4324 static int
4325 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4326                     unsigned int rsize, struct list_head *tmplist,
4327                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4328 {
4329         struct page *page, *tpage;
4330         unsigned int expected_index;
4331         int rc;
4332         gfp_t gfp = readahead_gfp_mask(mapping);
4333
4334         INIT_LIST_HEAD(tmplist);
4335
4336         page = lru_to_page(page_list);
4337
4338         /*
4339          * Lock the page and put it in the cache. Since no one else
4340          * should have access to this page, we're safe to simply set
4341          * PG_locked without checking it first.
4342          */
4343         __SetPageLocked(page);
4344         rc = add_to_page_cache_locked(page, mapping,
4345                                       page->index, gfp);
4346
4347         /* give up if we can't stick it in the cache */
4348         if (rc) {
4349                 __ClearPageLocked(page);
4350                 return rc;
4351         }
4352
4353         /* move first page to the tmplist */
4354         *offset = (loff_t)page->index << PAGE_SHIFT;
4355         *bytes = PAGE_SIZE;
4356         *nr_pages = 1;
4357         list_move_tail(&page->lru, tmplist);
4358
4359         /* now try and add more pages onto the request */
4360         expected_index = page->index + 1;
4361         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4362                 /* discontinuity ? */
4363                 if (page->index != expected_index)
4364                         break;
4365
4366                 /* would this page push the read over the rsize? */
4367                 if (*bytes + PAGE_SIZE > rsize)
4368                         break;
4369
4370                 __SetPageLocked(page);
4371                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4372                 if (rc) {
4373                         __ClearPageLocked(page);
4374                         break;
4375                 }
4376                 list_move_tail(&page->lru, tmplist);
4377                 (*bytes) += PAGE_SIZE;
4378                 expected_index++;
4379                 (*nr_pages)++;
4380         }
4381         return rc;
4382 }
4383
4384 static int cifs_readpages(struct file *file, struct address_space *mapping,
4385         struct list_head *page_list, unsigned num_pages)
4386 {
4387         int rc;
4388         int err = 0;
4389         struct list_head tmplist;
4390         struct cifsFileInfo *open_file = file->private_data;
4391         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4392         struct TCP_Server_Info *server;
4393         pid_t pid;
4394         unsigned int xid;
4395
4396         xid = get_xid();
4397         /*
4398          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4399          * immediately if the cookie is negative
4400          *
4401          * After this point, every page in the list might have PG_fscache set,
4402          * so we will need to clean that up off of every page we don't use.
4403          */
4404         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4405                                          &num_pages);
4406         if (rc == 0) {
4407                 free_xid(xid);
4408                 return rc;
4409         }
4410
4411         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4412                 pid = open_file->pid;
4413         else
4414                 pid = current->tgid;
4415
4416         rc = 0;
4417         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4418
4419         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4420                  __func__, file, mapping, num_pages);
4421
4422         /*
4423          * Start with the page at end of list and move it to private
4424          * list. Do the same with any following pages until we hit
4425          * the rsize limit, hit an index discontinuity, or run out of
4426          * pages. Issue the async read and then start the loop again
4427          * until the list is empty.
4428          *
4429          * Note that list order is important. The page_list is in
4430          * the order of declining indexes. When we put the pages in
4431          * the rdata->pages, then we want them in increasing order.
4432          */
4433         while (!list_empty(page_list) && !err) {
4434                 unsigned int i, nr_pages, bytes, rsize;
4435                 loff_t offset;
4436                 struct page *page, *tpage;
4437                 struct cifs_readdata *rdata;
4438                 struct cifs_credits credits_on_stack;
4439                 struct cifs_credits *credits = &credits_on_stack;
4440
4441                 if (open_file->invalidHandle) {
4442                         rc = cifs_reopen_file(open_file, true);
4443                         if (rc == -EAGAIN)
4444                                 continue;
4445                         else if (rc)
4446                                 break;
4447                 }
4448
4449                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4450                                                    &rsize, credits);
4451                 if (rc)
4452                         break;
4453
4454                 /*
4455                  * Give up immediately if rsize is too small to read an entire
4456                  * page. The VFS will fall back to readpage. We should never
4457                  * reach this point however since we set ra_pages to 0 when the
4458                  * rsize is smaller than a cache page.
4459                  */
4460                 if (unlikely(rsize < PAGE_SIZE)) {
4461                         add_credits_and_wake_if(server, credits, 0);
4462                         free_xid(xid);
4463                         return 0;
4464                 }
4465
4466                 nr_pages = 0;
4467                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4468                                          &nr_pages, &offset, &bytes);
4469                 if (!nr_pages) {
4470                         add_credits_and_wake_if(server, credits, 0);
4471                         break;
4472                 }
4473
4474                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4475                 if (!rdata) {
4476                         /* best to give up if we're out of mem */
4477                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4478                                 list_del(&page->lru);
4479                                 lru_cache_add(page);
4480                                 unlock_page(page);
4481                                 put_page(page);
4482                         }
4483                         rc = -ENOMEM;
4484                         add_credits_and_wake_if(server, credits, 0);
4485                         break;
4486                 }
4487
4488                 rdata->cfile = cifsFileInfo_get(open_file);
4489                 rdata->server = server;
4490                 rdata->mapping = mapping;
4491                 rdata->offset = offset;
4492                 rdata->bytes = bytes;
4493                 rdata->pid = pid;
4494                 rdata->pagesz = PAGE_SIZE;
4495                 rdata->tailsz = PAGE_SIZE;
4496                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4497                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4498                 rdata->credits = credits_on_stack;
4499
4500                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4501                         list_del(&page->lru);
4502                         rdata->pages[rdata->nr_pages++] = page;
4503                 }
4504
4505                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4506
4507                 if (!rc) {
4508                         if (rdata->cfile->invalidHandle)
4509                                 rc = -EAGAIN;
4510                         else
4511                                 rc = server->ops->async_readv(rdata);
4512                 }
4513
4514                 if (rc) {
4515                         add_credits_and_wake_if(server, &rdata->credits, 0);
4516                         for (i = 0; i < rdata->nr_pages; i++) {
4517                                 page = rdata->pages[i];
4518                                 lru_cache_add(page);
4519                                 unlock_page(page);
4520                                 put_page(page);
4521                         }
4522                         /* Fallback to the readpage in error/reconnect cases */
4523                         kref_put(&rdata->refcount, cifs_readdata_release);
4524                         break;
4525                 }
4526
4527                 kref_put(&rdata->refcount, cifs_readdata_release);
4528         }
4529
4530         /* Any pages that have been shown to fscache but didn't get added to
4531          * the pagecache must be uncached before they get returned to the
4532          * allocator.
4533          */
4534         cifs_fscache_readpages_cancel(mapping->host, page_list);
4535         free_xid(xid);
4536         return rc;
4537 }
4538
4539 /*
4540  * cifs_readpage_worker must be called with the page pinned
4541  */
4542 static int cifs_readpage_worker(struct file *file, struct page *page,
4543         loff_t *poffset)
4544 {
4545         char *read_data;
4546         int rc;
4547
4548         /* Is the page cached? */
4549         rc = cifs_readpage_from_fscache(file_inode(file), page);
4550         if (rc == 0)
4551                 goto read_complete;
4552
4553         read_data = kmap(page);
4554         /* for reads over a certain size could initiate async read ahead */
4555
4556         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4557
4558         if (rc < 0)
4559                 goto io_error;
4560         else
4561                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4562
4563         /* we do not want atime to be less than mtime, it broke some apps */
4564         file_inode(file)->i_atime = current_time(file_inode(file));
4565         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4566                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4567         else
4568                 file_inode(file)->i_atime = current_time(file_inode(file));
4569
4570         if (PAGE_SIZE > rc)
4571                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4572
4573         flush_dcache_page(page);
4574         SetPageUptodate(page);
4575
4576         /* send this page to the cache */
4577         cifs_readpage_to_fscache(file_inode(file), page);
4578
4579         rc = 0;
4580
4581 io_error:
4582         kunmap(page);
4583
4584 read_complete:
4585         unlock_page(page);
4586         return rc;
4587 }
4588
4589 static int cifs_readpage(struct file *file, struct page *page)
4590 {
4591         loff_t offset = page_file_offset(page);
4592         int rc = -EACCES;
4593         unsigned int xid;
4594
4595         xid = get_xid();
4596
4597         if (file->private_data == NULL) {
4598                 rc = -EBADF;
4599                 free_xid(xid);
4600                 return rc;
4601         }
4602
4603         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4604                  page, (int)offset, (int)offset);
4605
4606         rc = cifs_readpage_worker(file, page, &offset);
4607
4608         free_xid(xid);
4609         return rc;
4610 }
4611
4612 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4613 {
4614         struct cifsFileInfo *open_file;
4615
4616         spin_lock(&cifs_inode->open_file_lock);
4617         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4618                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4619                         spin_unlock(&cifs_inode->open_file_lock);
4620                         return 1;
4621                 }
4622         }
4623         spin_unlock(&cifs_inode->open_file_lock);
4624         return 0;
4625 }
4626
4627 /* We do not want to update the file size from server for inodes
4628    open for write - to avoid races with writepage extending
4629    the file - in the future we could consider allowing
4630    refreshing the inode only on increases in the file size
4631    but this is tricky to do without racing with writebehind
4632    page caching in the current Linux kernel design */
4633 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4634 {
4635         if (!cifsInode)
4636                 return true;
4637
4638         if (is_inode_writable(cifsInode)) {
4639                 /* This inode is open for write at least once */
4640                 struct cifs_sb_info *cifs_sb;
4641
4642                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4643                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4644                         /* since no page cache to corrupt on directio
4645                         we can change size safely */
4646                         return true;
4647                 }
4648
4649                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4650                         return true;
4651
4652                 return false;
4653         } else
4654                 return true;
4655 }
4656
4657 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4658                         loff_t pos, unsigned len, unsigned flags,
4659                         struct page **pagep, void **fsdata)
4660 {
4661         int oncethru = 0;
4662         pgoff_t index = pos >> PAGE_SHIFT;
4663         loff_t offset = pos & (PAGE_SIZE - 1);
4664         loff_t page_start = pos & PAGE_MASK;
4665         loff_t i_size;
4666         struct page *page;
4667         int rc = 0;
4668
4669         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4670
4671 start:
4672         page = grab_cache_page_write_begin(mapping, index, flags);
4673         if (!page) {
4674                 rc = -ENOMEM;
4675                 goto out;
4676         }
4677
4678         if (PageUptodate(page))
4679                 goto out;
4680
4681         /*
4682          * If we write a full page it will be up to date, no need to read from
4683          * the server. If the write is short, we'll end up doing a sync write
4684          * instead.
4685          */
4686         if (len == PAGE_SIZE)
4687                 goto out;
4688
4689         /*
4690          * optimize away the read when we have an oplock, and we're not
4691          * expecting to use any of the data we'd be reading in. That
4692          * is, when the page lies beyond the EOF, or straddles the EOF
4693          * and the write will cover all of the existing data.
4694          */
4695         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4696                 i_size = i_size_read(mapping->host);
4697                 if (page_start >= i_size ||
4698                     (offset == 0 && (pos + len) >= i_size)) {
4699                         zero_user_segments(page, 0, offset,
4700                                            offset + len,
4701                                            PAGE_SIZE);
4702                         /*
4703                          * PageChecked means that the parts of the page
4704                          * to which we're not writing are considered up
4705                          * to date. Once the data is copied to the
4706                          * page, it can be set uptodate.
4707                          */
4708                         SetPageChecked(page);
4709                         goto out;
4710                 }
4711         }
4712
4713         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4714                 /*
4715                  * might as well read a page, it is fast enough. If we get
4716                  * an error, we don't need to return it. cifs_write_end will
4717                  * do a sync write instead since PG_uptodate isn't set.
4718                  */
4719                 cifs_readpage_worker(file, page, &page_start);
4720                 put_page(page);
4721                 oncethru = 1;
4722                 goto start;
4723         } else {
4724                 /* we could try using another file handle if there is one -
4725                    but how would we lock it to prevent close of that handle
4726                    racing with this read? In any case
4727                    this will be written out by write_end so is fine */
4728         }
4729 out:
4730         *pagep = page;
4731         return rc;
4732 }
4733
4734 static int cifs_release_page(struct page *page, gfp_t gfp)
4735 {
4736         if (PagePrivate(page))
4737                 return 0;
4738
4739         return cifs_fscache_release_page(page, gfp);
4740 }
4741
4742 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4743                                  unsigned int length)
4744 {
4745         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4746
4747         if (offset == 0 && length == PAGE_SIZE)
4748                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4749 }
4750
4751 static int cifs_launder_page(struct page *page)
4752 {
4753         int rc = 0;
4754         loff_t range_start = page_offset(page);
4755         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4756         struct writeback_control wbc = {
4757                 .sync_mode = WB_SYNC_ALL,
4758                 .nr_to_write = 0,
4759                 .range_start = range_start,
4760                 .range_end = range_end,
4761         };
4762
4763         cifs_dbg(FYI, "Launder page: %p\n", page);
4764
4765         if (clear_page_dirty_for_io(page))
4766                 rc = cifs_writepage_locked(page, &wbc);
4767
4768         cifs_fscache_invalidate_page(page, page->mapping->host);
4769         return rc;
4770 }
4771
4772 void cifs_oplock_break(struct work_struct *work)
4773 {
4774         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4775                                                   oplock_break);
4776         struct inode *inode = d_inode(cfile->dentry);
4777         struct cifsInodeInfo *cinode = CIFS_I(inode);
4778         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4779         struct TCP_Server_Info *server = tcon->ses->server;
4780         int rc = 0;
4781         bool purge_cache = false;
4782
4783         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4784                         TASK_UNINTERRUPTIBLE);
4785
4786         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4787                                       cfile->oplock_epoch, &purge_cache);
4788
4789         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4790                                                 cifs_has_mand_locks(cinode)) {
4791                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4792                          inode);
4793                 cinode->oplock = 0;
4794         }
4795
4796         if (inode && S_ISREG(inode->i_mode)) {
4797                 if (CIFS_CACHE_READ(cinode))
4798                         break_lease(inode, O_RDONLY);
4799                 else
4800                         break_lease(inode, O_WRONLY);
4801                 rc = filemap_fdatawrite(inode->i_mapping);
4802                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4803                         rc = filemap_fdatawait(inode->i_mapping);
4804                         mapping_set_error(inode->i_mapping, rc);
4805                         cifs_zap_mapping(inode);
4806                 }
4807                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4808                 if (CIFS_CACHE_WRITE(cinode))
4809                         goto oplock_break_ack;
4810         }
4811
4812         rc = cifs_push_locks(cfile);
4813         if (rc)
4814                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4815
4816 oplock_break_ack:
4817         /*
4818          * releasing stale oplock after recent reconnect of smb session using
4819          * a now incorrect file handle is not a data integrity issue but do
4820          * not bother sending an oplock release if session to server still is
4821          * disconnected since oplock already released by the server
4822          */
4823         if (!cfile->oplock_break_cancelled) {
4824                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4825                                                              cinode);
4826                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4827         }
4828         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4829         cifs_done_oplock_break(cinode);
4830 }
4831
4832 /*
4833  * The presence of cifs_direct_io() in the address space ops vector
4834  * allowes open() O_DIRECT flags which would have failed otherwise.
4835  *
4836  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4837  * so this method should never be called.
4838  *
4839  * Direct IO is not yet supported in the cached mode. 
4840  */
4841 static ssize_t
4842 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4843 {
4844         /*
4845          * FIXME
4846          * Eventually need to support direct IO for non forcedirectio mounts
4847          */
4848         return -EINVAL;
4849 }
4850
4851 static int cifs_swap_activate(struct swap_info_struct *sis,
4852                               struct file *swap_file, sector_t *span)
4853 {
4854         struct cifsFileInfo *cfile = swap_file->private_data;
4855         struct inode *inode = swap_file->f_mapping->host;
4856         unsigned long blocks;
4857         long long isize;
4858
4859         cifs_dbg(FYI, "swap activate\n");
4860
4861         spin_lock(&inode->i_lock);
4862         blocks = inode->i_blocks;
4863         isize = inode->i_size;
4864         spin_unlock(&inode->i_lock);
4865         if (blocks*512 < isize) {
4866                 pr_warn("swap activate: swapfile has holes\n");
4867                 return -EINVAL;
4868         }
4869         *span = sis->pages;
4870
4871         pr_warn_once("Swap support over SMB3 is experimental\n");
4872
4873         /*
4874          * TODO: consider adding ACL (or documenting how) to prevent other
4875          * users (on this or other systems) from reading it
4876          */
4877
4878
4879         /* TODO: add sk_set_memalloc(inet) or similar */
4880
4881         if (cfile)
4882                 cfile->swapfile = true;
4883         /*
4884          * TODO: Since file already open, we can't open with DENY_ALL here
4885          * but we could add call to grab a byte range lock to prevent others
4886          * from reading or writing the file
4887          */
4888
4889         return 0;
4890 }
4891
4892 static void cifs_swap_deactivate(struct file *file)
4893 {
4894         struct cifsFileInfo *cfile = file->private_data;
4895
4896         cifs_dbg(FYI, "swap deactivate\n");
4897
4898         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4899
4900         if (cfile)
4901                 cfile->swapfile = false;
4902
4903         /* do we need to unpin (or unlock) the file */
4904 }
4905
4906 const struct address_space_operations cifs_addr_ops = {
4907         .readpage = cifs_readpage,
4908         .readpages = cifs_readpages,
4909         .writepage = cifs_writepage,
4910         .writepages = cifs_writepages,
4911         .write_begin = cifs_write_begin,
4912         .write_end = cifs_write_end,
4913         .set_page_dirty = __set_page_dirty_nobuffers,
4914         .releasepage = cifs_release_page,
4915         .direct_IO = cifs_direct_io,
4916         .invalidatepage = cifs_invalidate_page,
4917         .launder_page = cifs_launder_page,
4918         /*
4919          * TODO: investigate and if useful we could add an cifs_migratePage
4920          * helper (under an CONFIG_MIGRATION) in the future, and also
4921          * investigate and add an is_dirty_writeback helper if needed
4922          */
4923         .swap_activate = cifs_swap_activate,
4924         .swap_deactivate = cifs_swap_deactivate,
4925 };
4926
4927 /*
4928  * cifs_readpages requires the server to support a buffer large enough to
4929  * contain the header plus one complete page of data.  Otherwise, we need
4930  * to leave cifs_readpages out of the address space operations.
4931  */
4932 const struct address_space_operations cifs_addr_ops_smallbuf = {
4933         .readpage = cifs_readpage,
4934         .writepage = cifs_writepage,
4935         .writepages = cifs_writepages,
4936         .write_begin = cifs_write_begin,
4937         .write_end = cifs_write_end,
4938         .set_page_dirty = __set_page_dirty_nobuffers,
4939         .releasepage = cifs_release_page,
4940         .invalidatepage = cifs_invalidate_page,
4941         .launder_page = cifs_launder_page,
4942 };