GNU Linux-libre 5.4.257-gnu1
[releases.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50         if ((flags & O_ACCMODE) == O_RDONLY)
51                 return GENERIC_READ;
52         else if ((flags & O_ACCMODE) == O_WRONLY)
53                 return GENERIC_WRITE;
54         else if ((flags & O_ACCMODE) == O_RDWR) {
55                 /* GENERIC_ALL is too much permission to request
56                    can cause unnecessary access denied on create */
57                 /* return GENERIC_ALL; */
58                 return (GENERIC_READ | GENERIC_WRITE);
59         }
60
61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63                 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68         u32 posix_flags = 0;
69
70         if ((flags & O_ACCMODE) == O_RDONLY)
71                 posix_flags = SMB_O_RDONLY;
72         else if ((flags & O_ACCMODE) == O_WRONLY)
73                 posix_flags = SMB_O_WRONLY;
74         else if ((flags & O_ACCMODE) == O_RDWR)
75                 posix_flags = SMB_O_RDWR;
76
77         if (flags & O_CREAT) {
78                 posix_flags |= SMB_O_CREAT;
79                 if (flags & O_EXCL)
80                         posix_flags |= SMB_O_EXCL;
81         } else if (flags & O_EXCL)
82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83                          current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_revalidate_mapping(*pinode);
168                 cifs_fattr_to_inode(*pinode, &fattr);
169         }
170
171 posix_open_ret:
172         kfree(presp_data);
173         return rc;
174 }
175
176 static int
177 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
178              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
179              struct cifs_fid *fid, unsigned int xid)
180 {
181         int rc;
182         int desired_access;
183         int disposition;
184         int create_options = CREATE_NOT_DIR;
185         FILE_ALL_INFO *buf;
186         struct TCP_Server_Info *server = tcon->ses->server;
187         struct cifs_open_parms oparms;
188
189         if (!server->ops->open)
190                 return -ENOSYS;
191
192         desired_access = cifs_convert_flags(f_flags);
193
194 /*********************************************************************
195  *  open flag mapping table:
196  *
197  *      POSIX Flag            CIFS Disposition
198  *      ----------            ----------------
199  *      O_CREAT               FILE_OPEN_IF
200  *      O_CREAT | O_EXCL      FILE_CREATE
201  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
202  *      O_TRUNC               FILE_OVERWRITE
203  *      none of the above     FILE_OPEN
204  *
205  *      Note that there is not a direct match between disposition
206  *      FILE_SUPERSEDE (ie create whether or not file exists although
207  *      O_CREAT | O_TRUNC is similar but truncates the existing
208  *      file rather than creating a new file as FILE_SUPERSEDE does
209  *      (which uses the attributes / metadata passed in on open call)
210  *?
211  *?  O_SYNC is a reasonable match to CIFS writethrough flag
212  *?  and the read write flags match reasonably.  O_LARGEFILE
213  *?  is irrelevant because largefile support is always used
214  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
215  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
216  *********************************************************************/
217
218         disposition = cifs_get_disposition(f_flags);
219
220         /* BB pass O_SYNC flag through on file attributes .. BB */
221
222         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
223         if (!buf)
224                 return -ENOMEM;
225
226         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
227         if (f_flags & O_SYNC)
228                 create_options |= CREATE_WRITE_THROUGH;
229
230         if (f_flags & O_DIRECT)
231                 create_options |= CREATE_NO_BUFFER;
232
233         oparms.tcon = tcon;
234         oparms.cifs_sb = cifs_sb;
235         oparms.desired_access = desired_access;
236         oparms.create_options = cifs_create_options(cifs_sb, create_options);
237         oparms.disposition = disposition;
238         oparms.path = full_path;
239         oparms.fid = fid;
240         oparms.reconnect = false;
241
242         rc = server->ops->open(xid, &oparms, oplock, buf);
243
244         if (rc)
245                 goto out;
246
247         if (tcon->unix_ext)
248                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
249                                               xid);
250         else
251                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
252                                          xid, fid);
253
254         if (rc) {
255                 server->ops->close(xid, tcon, fid);
256                 if (rc == -ESTALE)
257                         rc = -EOPENSTALE;
258         }
259
260 out:
261         kfree(buf);
262         return rc;
263 }
264
265 static bool
266 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
267 {
268         struct cifs_fid_locks *cur;
269         bool has_locks = false;
270
271         down_read(&cinode->lock_sem);
272         list_for_each_entry(cur, &cinode->llist, llist) {
273                 if (!list_empty(&cur->locks)) {
274                         has_locks = true;
275                         break;
276                 }
277         }
278         up_read(&cinode->lock_sem);
279         return has_locks;
280 }
281
282 void
283 cifs_down_write(struct rw_semaphore *sem)
284 {
285         while (!down_write_trylock(sem))
286                 msleep(10);
287 }
288
289 static void cifsFileInfo_put_work(struct work_struct *work);
290
291 struct cifsFileInfo *
292 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
293                   struct tcon_link *tlink, __u32 oplock)
294 {
295         struct dentry *dentry = file_dentry(file);
296         struct inode *inode = d_inode(dentry);
297         struct cifsInodeInfo *cinode = CIFS_I(inode);
298         struct cifsFileInfo *cfile;
299         struct cifs_fid_locks *fdlocks;
300         struct cifs_tcon *tcon = tlink_tcon(tlink);
301         struct TCP_Server_Info *server = tcon->ses->server;
302
303         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
304         if (cfile == NULL)
305                 return cfile;
306
307         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
308         if (!fdlocks) {
309                 kfree(cfile);
310                 return NULL;
311         }
312
313         INIT_LIST_HEAD(&fdlocks->locks);
314         fdlocks->cfile = cfile;
315         cfile->llist = fdlocks;
316
317         cfile->count = 1;
318         cfile->pid = current->tgid;
319         cfile->uid = current_fsuid();
320         cfile->dentry = dget(dentry);
321         cfile->f_flags = file->f_flags;
322         cfile->invalidHandle = false;
323         cfile->tlink = cifs_get_tlink(tlink);
324         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
325         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
326         mutex_init(&cfile->fh_mutex);
327         spin_lock_init(&cfile->file_info_lock);
328
329         cifs_sb_active(inode->i_sb);
330
331         /*
332          * If the server returned a read oplock and we have mandatory brlocks,
333          * set oplock level to None.
334          */
335         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
336                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
337                 oplock = 0;
338         }
339
340         cifs_down_write(&cinode->lock_sem);
341         list_add(&fdlocks->llist, &cinode->llist);
342         up_write(&cinode->lock_sem);
343
344         spin_lock(&tcon->open_file_lock);
345         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
346                 oplock = fid->pending_open->oplock;
347         list_del(&fid->pending_open->olist);
348
349         fid->purge_cache = false;
350         server->ops->set_fid(cfile, fid, oplock);
351
352         list_add(&cfile->tlist, &tcon->openFileList);
353         atomic_inc(&tcon->num_local_opens);
354
355         /* if readable file instance put first in list*/
356         spin_lock(&cinode->open_file_lock);
357         if (file->f_mode & FMODE_READ)
358                 list_add(&cfile->flist, &cinode->openFileList);
359         else
360                 list_add_tail(&cfile->flist, &cinode->openFileList);
361         spin_unlock(&cinode->open_file_lock);
362         spin_unlock(&tcon->open_file_lock);
363
364         if (fid->purge_cache)
365                 cifs_zap_mapping(inode);
366
367         file->private_data = cfile;
368         return cfile;
369 }
370
371 struct cifsFileInfo *
372 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
373 {
374         spin_lock(&cifs_file->file_info_lock);
375         cifsFileInfo_get_locked(cifs_file);
376         spin_unlock(&cifs_file->file_info_lock);
377         return cifs_file;
378 }
379
380 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
381 {
382         struct inode *inode = d_inode(cifs_file->dentry);
383         struct cifsInodeInfo *cifsi = CIFS_I(inode);
384         struct cifsLockInfo *li, *tmp;
385         struct super_block *sb = inode->i_sb;
386
387         /*
388          * Delete any outstanding lock records. We'll lose them when the file
389          * is closed anyway.
390          */
391         cifs_down_write(&cifsi->lock_sem);
392         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
393                 list_del(&li->llist);
394                 cifs_del_lock_waiters(li);
395                 kfree(li);
396         }
397         list_del(&cifs_file->llist->llist);
398         kfree(cifs_file->llist);
399         up_write(&cifsi->lock_sem);
400
401         cifs_put_tlink(cifs_file->tlink);
402         dput(cifs_file->dentry);
403         cifs_sb_deactive(sb);
404         kfree(cifs_file);
405 }
406
407 static void cifsFileInfo_put_work(struct work_struct *work)
408 {
409         struct cifsFileInfo *cifs_file = container_of(work,
410                         struct cifsFileInfo, put);
411
412         cifsFileInfo_put_final(cifs_file);
413 }
414
415 /**
416  * cifsFileInfo_put - release a reference of file priv data
417  *
418  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
419  */
420 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
421 {
422         _cifsFileInfo_put(cifs_file, true, true);
423 }
424
425 /**
426  * _cifsFileInfo_put - release a reference of file priv data
427  *
428  * This may involve closing the filehandle @cifs_file out on the
429  * server. Must be called without holding tcon->open_file_lock,
430  * cinode->open_file_lock and cifs_file->file_info_lock.
431  *
432  * If @wait_for_oplock_handler is true and we are releasing the last
433  * reference, wait for any running oplock break handler of the file
434  * and cancel any pending one. If calling this function from the
435  * oplock break handler, you need to pass false.
436  *
437  */
438 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
439                        bool wait_oplock_handler, bool offload)
440 {
441         struct inode *inode = d_inode(cifs_file->dentry);
442         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
443         struct TCP_Server_Info *server = tcon->ses->server;
444         struct cifsInodeInfo *cifsi = CIFS_I(inode);
445         struct super_block *sb = inode->i_sb;
446         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
447         struct cifs_fid fid;
448         struct cifs_pending_open open;
449         bool oplock_break_cancelled;
450
451         spin_lock(&tcon->open_file_lock);
452         spin_lock(&cifsi->open_file_lock);
453         spin_lock(&cifs_file->file_info_lock);
454         if (--cifs_file->count > 0) {
455                 spin_unlock(&cifs_file->file_info_lock);
456                 spin_unlock(&cifsi->open_file_lock);
457                 spin_unlock(&tcon->open_file_lock);
458                 return;
459         }
460         spin_unlock(&cifs_file->file_info_lock);
461
462         if (server->ops->get_lease_key)
463                 server->ops->get_lease_key(inode, &fid);
464
465         /* store open in pending opens to make sure we don't miss lease break */
466         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
467
468         /* remove it from the lists */
469         list_del(&cifs_file->flist);
470         list_del(&cifs_file->tlist);
471         atomic_dec(&tcon->num_local_opens);
472
473         if (list_empty(&cifsi->openFileList)) {
474                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
475                          d_inode(cifs_file->dentry));
476                 /*
477                  * In strict cache mode we need invalidate mapping on the last
478                  * close  because it may cause a error when we open this file
479                  * again and get at least level II oplock.
480                  */
481                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
482                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
483                 cifs_set_oplock_level(cifsi, 0);
484         }
485
486         spin_unlock(&cifsi->open_file_lock);
487         spin_unlock(&tcon->open_file_lock);
488
489         oplock_break_cancelled = wait_oplock_handler ?
490                 cancel_work_sync(&cifs_file->oplock_break) : false;
491
492         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
493                 struct TCP_Server_Info *server = tcon->ses->server;
494                 unsigned int xid;
495
496                 xid = get_xid();
497                 if (server->ops->close)
498                         server->ops->close(xid, tcon, &cifs_file->fid);
499                 _free_xid(xid);
500         }
501
502         if (oplock_break_cancelled)
503                 cifs_done_oplock_break(cifsi);
504
505         cifs_del_pending_open(&open);
506
507         if (offload)
508                 queue_work(fileinfo_put_wq, &cifs_file->put);
509         else
510                 cifsFileInfo_put_final(cifs_file);
511 }
512
513 int cifs_open(struct inode *inode, struct file *file)
514
515 {
516         int rc = -EACCES;
517         unsigned int xid;
518         __u32 oplock;
519         struct cifs_sb_info *cifs_sb;
520         struct TCP_Server_Info *server;
521         struct cifs_tcon *tcon;
522         struct tcon_link *tlink;
523         struct cifsFileInfo *cfile = NULL;
524         char *full_path = NULL;
525         bool posix_open_ok = false;
526         struct cifs_fid fid;
527         struct cifs_pending_open open;
528
529         xid = get_xid();
530
531         cifs_sb = CIFS_SB(inode->i_sb);
532         tlink = cifs_sb_tlink(cifs_sb);
533         if (IS_ERR(tlink)) {
534                 free_xid(xid);
535                 return PTR_ERR(tlink);
536         }
537         tcon = tlink_tcon(tlink);
538         server = tcon->ses->server;
539
540         full_path = build_path_from_dentry(file_dentry(file));
541         if (full_path == NULL) {
542                 rc = -ENOMEM;
543                 goto out;
544         }
545
546         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
547                  inode, file->f_flags, full_path);
548
549         if (file->f_flags & O_DIRECT &&
550             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
551                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
552                         file->f_op = &cifs_file_direct_nobrl_ops;
553                 else
554                         file->f_op = &cifs_file_direct_ops;
555         }
556
557         if (server->oplocks)
558                 oplock = REQ_OPLOCK;
559         else
560                 oplock = 0;
561
562         if (!tcon->broken_posix_open && tcon->unix_ext &&
563             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
564                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
565                 /* can not refresh inode info since size could be stale */
566                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
567                                 cifs_sb->mnt_file_mode /* ignored */,
568                                 file->f_flags, &oplock, &fid.netfid, xid);
569                 if (rc == 0) {
570                         cifs_dbg(FYI, "posix open succeeded\n");
571                         posix_open_ok = true;
572                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
573                         if (tcon->ses->serverNOS)
574                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
575                                          tcon->ses->serverName,
576                                          tcon->ses->serverNOS);
577                         tcon->broken_posix_open = true;
578                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
579                          (rc != -EOPNOTSUPP)) /* path not found or net err */
580                         goto out;
581                 /*
582                  * Else fallthrough to retry open the old way on network i/o
583                  * or DFS errors.
584                  */
585         }
586
587         if (server->ops->get_lease_key)
588                 server->ops->get_lease_key(inode, &fid);
589
590         cifs_add_pending_open(&fid, tlink, &open);
591
592         if (!posix_open_ok) {
593                 if (server->ops->get_lease_key)
594                         server->ops->get_lease_key(inode, &fid);
595
596                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
597                                   file->f_flags, &oplock, &fid, xid);
598                 if (rc) {
599                         cifs_del_pending_open(&open);
600                         goto out;
601                 }
602         }
603
604         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
605         if (cfile == NULL) {
606                 if (server->ops->close)
607                         server->ops->close(xid, tcon, &fid);
608                 cifs_del_pending_open(&open);
609                 rc = -ENOMEM;
610                 goto out;
611         }
612
613         cifs_fscache_set_inode_cookie(inode, file);
614
615         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
616                 /*
617                  * Time to set mode which we can not set earlier due to
618                  * problems creating new read-only files.
619                  */
620                 struct cifs_unix_set_info_args args = {
621                         .mode   = inode->i_mode,
622                         .uid    = INVALID_UID, /* no change */
623                         .gid    = INVALID_GID, /* no change */
624                         .ctime  = NO_CHANGE_64,
625                         .atime  = NO_CHANGE_64,
626                         .mtime  = NO_CHANGE_64,
627                         .device = 0,
628                 };
629                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
630                                        cfile->pid);
631         }
632
633 out:
634         kfree(full_path);
635         free_xid(xid);
636         cifs_put_tlink(tlink);
637         return rc;
638 }
639
640 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
641
642 /*
643  * Try to reacquire byte range locks that were released when session
644  * to server was lost.
645  */
646 static int
647 cifs_relock_file(struct cifsFileInfo *cfile)
648 {
649         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
650         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
651         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
652         int rc = 0;
653
654         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
655         if (cinode->can_cache_brlcks) {
656                 /* can cache locks - no need to relock */
657                 up_read(&cinode->lock_sem);
658                 return rc;
659         }
660
661         if (cap_unix(tcon->ses) &&
662             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
663             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
664                 rc = cifs_push_posix_locks(cfile);
665         else
666                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
667
668         up_read(&cinode->lock_sem);
669         return rc;
670 }
671
672 static int
673 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
674 {
675         int rc = -EACCES;
676         unsigned int xid;
677         __u32 oplock;
678         struct cifs_sb_info *cifs_sb;
679         struct cifs_tcon *tcon;
680         struct TCP_Server_Info *server;
681         struct cifsInodeInfo *cinode;
682         struct inode *inode;
683         char *full_path = NULL;
684         int desired_access;
685         int disposition = FILE_OPEN;
686         int create_options = CREATE_NOT_DIR;
687         struct cifs_open_parms oparms;
688
689         xid = get_xid();
690         mutex_lock(&cfile->fh_mutex);
691         if (!cfile->invalidHandle) {
692                 mutex_unlock(&cfile->fh_mutex);
693                 rc = 0;
694                 free_xid(xid);
695                 return rc;
696         }
697
698         inode = d_inode(cfile->dentry);
699         cifs_sb = CIFS_SB(inode->i_sb);
700         tcon = tlink_tcon(cfile->tlink);
701         server = tcon->ses->server;
702
703         /*
704          * Can not grab rename sem here because various ops, including those
705          * that already have the rename sem can end up causing writepage to get
706          * called and if the server was down that means we end up here, and we
707          * can never tell if the caller already has the rename_sem.
708          */
709         full_path = build_path_from_dentry(cfile->dentry);
710         if (full_path == NULL) {
711                 rc = -ENOMEM;
712                 mutex_unlock(&cfile->fh_mutex);
713                 free_xid(xid);
714                 return rc;
715         }
716
717         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
718                  inode, cfile->f_flags, full_path);
719
720         if (tcon->ses->server->oplocks)
721                 oplock = REQ_OPLOCK;
722         else
723                 oplock = 0;
724
725         if (tcon->unix_ext && cap_unix(tcon->ses) &&
726             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
727                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
728                 /*
729                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
730                  * original open. Must mask them off for a reopen.
731                  */
732                 unsigned int oflags = cfile->f_flags &
733                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
734
735                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
736                                      cifs_sb->mnt_file_mode /* ignored */,
737                                      oflags, &oplock, &cfile->fid.netfid, xid);
738                 if (rc == 0) {
739                         cifs_dbg(FYI, "posix reopen succeeded\n");
740                         oparms.reconnect = true;
741                         goto reopen_success;
742                 }
743                 /*
744                  * fallthrough to retry open the old way on errors, especially
745                  * in the reconnect path it is important to retry hard
746                  */
747         }
748
749         desired_access = cifs_convert_flags(cfile->f_flags);
750
751         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
752         if (cfile->f_flags & O_SYNC)
753                 create_options |= CREATE_WRITE_THROUGH;
754
755         if (cfile->f_flags & O_DIRECT)
756                 create_options |= CREATE_NO_BUFFER;
757
758         if (server->ops->get_lease_key)
759                 server->ops->get_lease_key(inode, &cfile->fid);
760
761         oparms.tcon = tcon;
762         oparms.cifs_sb = cifs_sb;
763         oparms.desired_access = desired_access;
764         oparms.create_options = cifs_create_options(cifs_sb, create_options);
765         oparms.disposition = disposition;
766         oparms.path = full_path;
767         oparms.fid = &cfile->fid;
768         oparms.reconnect = true;
769
770         /*
771          * Can not refresh inode by passing in file_info buf to be returned by
772          * ops->open and then calling get_inode_info with returned buf since
773          * file might have write behind data that needs to be flushed and server
774          * version of file size can be stale. If we knew for sure that inode was
775          * not dirty locally we could do this.
776          */
777         rc = server->ops->open(xid, &oparms, &oplock, NULL);
778         if (rc == -ENOENT && oparms.reconnect == false) {
779                 /* durable handle timeout is expired - open the file again */
780                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
781                 /* indicate that we need to relock the file */
782                 oparms.reconnect = true;
783         }
784
785         if (rc) {
786                 mutex_unlock(&cfile->fh_mutex);
787                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
788                 cifs_dbg(FYI, "oplock: %d\n", oplock);
789                 goto reopen_error_exit;
790         }
791
792 reopen_success:
793         cfile->invalidHandle = false;
794         mutex_unlock(&cfile->fh_mutex);
795         cinode = CIFS_I(inode);
796
797         if (can_flush) {
798                 rc = filemap_write_and_wait(inode->i_mapping);
799                 if (!is_interrupt_error(rc))
800                         mapping_set_error(inode->i_mapping, rc);
801
802                 if (tcon->unix_ext)
803                         rc = cifs_get_inode_info_unix(&inode, full_path,
804                                                       inode->i_sb, xid);
805                 else
806                         rc = cifs_get_inode_info(&inode, full_path, NULL,
807                                                  inode->i_sb, xid, NULL);
808         }
809         /*
810          * Else we are writing out data to server already and could deadlock if
811          * we tried to flush data, and since we do not know if we have data that
812          * would invalidate the current end of file on the server we can not go
813          * to the server to get the new inode info.
814          */
815
816         /*
817          * If the server returned a read oplock and we have mandatory brlocks,
818          * set oplock level to None.
819          */
820         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
821                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
822                 oplock = 0;
823         }
824
825         server->ops->set_fid(cfile, &cfile->fid, oplock);
826         if (oparms.reconnect)
827                 cifs_relock_file(cfile);
828
829 reopen_error_exit:
830         kfree(full_path);
831         free_xid(xid);
832         return rc;
833 }
834
835 int cifs_close(struct inode *inode, struct file *file)
836 {
837         if (file->private_data != NULL) {
838                 _cifsFileInfo_put(file->private_data, true, false);
839                 file->private_data = NULL;
840         }
841
842         /* return code from the ->release op is always ignored */
843         return 0;
844 }
845
846 void
847 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
848 {
849         struct cifsFileInfo *open_file;
850         struct list_head *tmp;
851         struct list_head *tmp1;
852         struct list_head tmp_list;
853
854         if (!tcon->use_persistent || !tcon->need_reopen_files)
855                 return;
856
857         tcon->need_reopen_files = false;
858
859         cifs_dbg(FYI, "Reopen persistent handles");
860         INIT_LIST_HEAD(&tmp_list);
861
862         /* list all files open on tree connection, reopen resilient handles  */
863         spin_lock(&tcon->open_file_lock);
864         list_for_each(tmp, &tcon->openFileList) {
865                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
866                 if (!open_file->invalidHandle)
867                         continue;
868                 cifsFileInfo_get(open_file);
869                 list_add_tail(&open_file->rlist, &tmp_list);
870         }
871         spin_unlock(&tcon->open_file_lock);
872
873         list_for_each_safe(tmp, tmp1, &tmp_list) {
874                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
875                 if (cifs_reopen_file(open_file, false /* do not flush */))
876                         tcon->need_reopen_files = true;
877                 list_del_init(&open_file->rlist);
878                 cifsFileInfo_put(open_file);
879         }
880 }
881
882 int cifs_closedir(struct inode *inode, struct file *file)
883 {
884         int rc = 0;
885         unsigned int xid;
886         struct cifsFileInfo *cfile = file->private_data;
887         struct cifs_tcon *tcon;
888         struct TCP_Server_Info *server;
889         char *buf;
890
891         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
892
893         if (cfile == NULL)
894                 return rc;
895
896         xid = get_xid();
897         tcon = tlink_tcon(cfile->tlink);
898         server = tcon->ses->server;
899
900         cifs_dbg(FYI, "Freeing private data in close dir\n");
901         spin_lock(&cfile->file_info_lock);
902         if (server->ops->dir_needs_close(cfile)) {
903                 cfile->invalidHandle = true;
904                 spin_unlock(&cfile->file_info_lock);
905                 if (server->ops->close_dir)
906                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
907                 else
908                         rc = -ENOSYS;
909                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
910                 /* not much we can do if it fails anyway, ignore rc */
911                 rc = 0;
912         } else
913                 spin_unlock(&cfile->file_info_lock);
914
915         buf = cfile->srch_inf.ntwrk_buf_start;
916         if (buf) {
917                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
918                 cfile->srch_inf.ntwrk_buf_start = NULL;
919                 if (cfile->srch_inf.smallBuf)
920                         cifs_small_buf_release(buf);
921                 else
922                         cifs_buf_release(buf);
923         }
924
925         cifs_put_tlink(cfile->tlink);
926         kfree(file->private_data);
927         file->private_data = NULL;
928         /* BB can we lock the filestruct while this is going on? */
929         free_xid(xid);
930         return rc;
931 }
932
933 static struct cifsLockInfo *
934 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
935 {
936         struct cifsLockInfo *lock =
937                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
938         if (!lock)
939                 return lock;
940         lock->offset = offset;
941         lock->length = length;
942         lock->type = type;
943         lock->pid = current->tgid;
944         lock->flags = flags;
945         INIT_LIST_HEAD(&lock->blist);
946         init_waitqueue_head(&lock->block_q);
947         return lock;
948 }
949
950 void
951 cifs_del_lock_waiters(struct cifsLockInfo *lock)
952 {
953         struct cifsLockInfo *li, *tmp;
954         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
955                 list_del_init(&li->blist);
956                 wake_up(&li->block_q);
957         }
958 }
959
960 #define CIFS_LOCK_OP    0
961 #define CIFS_READ_OP    1
962 #define CIFS_WRITE_OP   2
963
964 /* @rw_check : 0 - no op, 1 - read, 2 - write */
965 static bool
966 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
967                             __u64 length, __u8 type, __u16 flags,
968                             struct cifsFileInfo *cfile,
969                             struct cifsLockInfo **conf_lock, int rw_check)
970 {
971         struct cifsLockInfo *li;
972         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
973         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
974
975         list_for_each_entry(li, &fdlocks->locks, llist) {
976                 if (offset + length <= li->offset ||
977                     offset >= li->offset + li->length)
978                         continue;
979                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
980                     server->ops->compare_fids(cfile, cur_cfile)) {
981                         /* shared lock prevents write op through the same fid */
982                         if (!(li->type & server->vals->shared_lock_type) ||
983                             rw_check != CIFS_WRITE_OP)
984                                 continue;
985                 }
986                 if ((type & server->vals->shared_lock_type) &&
987                     ((server->ops->compare_fids(cfile, cur_cfile) &&
988                      current->tgid == li->pid) || type == li->type))
989                         continue;
990                 if (rw_check == CIFS_LOCK_OP &&
991                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
992                     server->ops->compare_fids(cfile, cur_cfile))
993                         continue;
994                 if (conf_lock)
995                         *conf_lock = li;
996                 return true;
997         }
998         return false;
999 }
1000
1001 bool
1002 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1003                         __u8 type, __u16 flags,
1004                         struct cifsLockInfo **conf_lock, int rw_check)
1005 {
1006         bool rc = false;
1007         struct cifs_fid_locks *cur;
1008         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1009
1010         list_for_each_entry(cur, &cinode->llist, llist) {
1011                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1012                                                  flags, cfile, conf_lock,
1013                                                  rw_check);
1014                 if (rc)
1015                         break;
1016         }
1017
1018         return rc;
1019 }
1020
1021 /*
1022  * Check if there is another lock that prevents us to set the lock (mandatory
1023  * style). If such a lock exists, update the flock structure with its
1024  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1025  * or leave it the same if we can't. Returns 0 if we don't need to request to
1026  * the server or 1 otherwise.
1027  */
1028 static int
1029 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1030                __u8 type, struct file_lock *flock)
1031 {
1032         int rc = 0;
1033         struct cifsLockInfo *conf_lock;
1034         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1035         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1036         bool exist;
1037
1038         down_read(&cinode->lock_sem);
1039
1040         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1041                                         flock->fl_flags, &conf_lock,
1042                                         CIFS_LOCK_OP);
1043         if (exist) {
1044                 flock->fl_start = conf_lock->offset;
1045                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1046                 flock->fl_pid = conf_lock->pid;
1047                 if (conf_lock->type & server->vals->shared_lock_type)
1048                         flock->fl_type = F_RDLCK;
1049                 else
1050                         flock->fl_type = F_WRLCK;
1051         } else if (!cinode->can_cache_brlcks)
1052                 rc = 1;
1053         else
1054                 flock->fl_type = F_UNLCK;
1055
1056         up_read(&cinode->lock_sem);
1057         return rc;
1058 }
1059
1060 static void
1061 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1062 {
1063         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1064         cifs_down_write(&cinode->lock_sem);
1065         list_add_tail(&lock->llist, &cfile->llist->locks);
1066         up_write(&cinode->lock_sem);
1067 }
1068
1069 /*
1070  * Set the byte-range lock (mandatory style). Returns:
1071  * 1) 0, if we set the lock and don't need to request to the server;
1072  * 2) 1, if no locks prevent us but we need to request to the server;
1073  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1074  */
1075 static int
1076 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1077                  bool wait)
1078 {
1079         struct cifsLockInfo *conf_lock;
1080         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1081         bool exist;
1082         int rc = 0;
1083
1084 try_again:
1085         exist = false;
1086         cifs_down_write(&cinode->lock_sem);
1087
1088         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1089                                         lock->type, lock->flags, &conf_lock,
1090                                         CIFS_LOCK_OP);
1091         if (!exist && cinode->can_cache_brlcks) {
1092                 list_add_tail(&lock->llist, &cfile->llist->locks);
1093                 up_write(&cinode->lock_sem);
1094                 return rc;
1095         }
1096
1097         if (!exist)
1098                 rc = 1;
1099         else if (!wait)
1100                 rc = -EACCES;
1101         else {
1102                 list_add_tail(&lock->blist, &conf_lock->blist);
1103                 up_write(&cinode->lock_sem);
1104                 rc = wait_event_interruptible(lock->block_q,
1105                                         (lock->blist.prev == &lock->blist) &&
1106                                         (lock->blist.next == &lock->blist));
1107                 if (!rc)
1108                         goto try_again;
1109                 cifs_down_write(&cinode->lock_sem);
1110                 list_del_init(&lock->blist);
1111         }
1112
1113         up_write(&cinode->lock_sem);
1114         return rc;
1115 }
1116
1117 /*
1118  * Check if there is another lock that prevents us to set the lock (posix
1119  * style). If such a lock exists, update the flock structure with its
1120  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1121  * or leave it the same if we can't. Returns 0 if we don't need to request to
1122  * the server or 1 otherwise.
1123  */
1124 static int
1125 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1126 {
1127         int rc = 0;
1128         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1129         unsigned char saved_type = flock->fl_type;
1130
1131         if ((flock->fl_flags & FL_POSIX) == 0)
1132                 return 1;
1133
1134         down_read(&cinode->lock_sem);
1135         posix_test_lock(file, flock);
1136
1137         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1138                 flock->fl_type = saved_type;
1139                 rc = 1;
1140         }
1141
1142         up_read(&cinode->lock_sem);
1143         return rc;
1144 }
1145
1146 /*
1147  * Set the byte-range lock (posix style). Returns:
1148  * 1) 0, if we set the lock and don't need to request to the server;
1149  * 2) 1, if we need to request to the server;
1150  * 3) <0, if the error occurs while setting the lock.
1151  */
1152 static int
1153 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1154 {
1155         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1156         int rc = 1;
1157
1158         if ((flock->fl_flags & FL_POSIX) == 0)
1159                 return rc;
1160
1161 try_again:
1162         cifs_down_write(&cinode->lock_sem);
1163         if (!cinode->can_cache_brlcks) {
1164                 up_write(&cinode->lock_sem);
1165                 return rc;
1166         }
1167
1168         rc = posix_lock_file(file, flock, NULL);
1169         up_write(&cinode->lock_sem);
1170         if (rc == FILE_LOCK_DEFERRED) {
1171                 rc = wait_event_interruptible(flock->fl_wait,
1172                                         list_empty(&flock->fl_blocked_member));
1173                 if (!rc)
1174                         goto try_again;
1175                 locks_delete_block(flock);
1176         }
1177         return rc;
1178 }
1179
1180 int
1181 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1182 {
1183         unsigned int xid;
1184         int rc = 0, stored_rc;
1185         struct cifsLockInfo *li, *tmp;
1186         struct cifs_tcon *tcon;
1187         unsigned int num, max_num, max_buf;
1188         LOCKING_ANDX_RANGE *buf, *cur;
1189         static const int types[] = {
1190                 LOCKING_ANDX_LARGE_FILES,
1191                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1192         };
1193         int i;
1194
1195         xid = get_xid();
1196         tcon = tlink_tcon(cfile->tlink);
1197
1198         /*
1199          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1200          * and check it before using.
1201          */
1202         max_buf = tcon->ses->server->maxBuf;
1203         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1204                 free_xid(xid);
1205                 return -EINVAL;
1206         }
1207
1208         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1209                      PAGE_SIZE);
1210         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1211                         PAGE_SIZE);
1212         max_num = (max_buf - sizeof(struct smb_hdr)) /
1213                                                 sizeof(LOCKING_ANDX_RANGE);
1214         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1215         if (!buf) {
1216                 free_xid(xid);
1217                 return -ENOMEM;
1218         }
1219
1220         for (i = 0; i < 2; i++) {
1221                 cur = buf;
1222                 num = 0;
1223                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1224                         if (li->type != types[i])
1225                                 continue;
1226                         cur->Pid = cpu_to_le16(li->pid);
1227                         cur->LengthLow = cpu_to_le32((u32)li->length);
1228                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1229                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1230                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1231                         if (++num == max_num) {
1232                                 stored_rc = cifs_lockv(xid, tcon,
1233                                                        cfile->fid.netfid,
1234                                                        (__u8)li->type, 0, num,
1235                                                        buf);
1236                                 if (stored_rc)
1237                                         rc = stored_rc;
1238                                 cur = buf;
1239                                 num = 0;
1240                         } else
1241                                 cur++;
1242                 }
1243
1244                 if (num) {
1245                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1246                                                (__u8)types[i], 0, num, buf);
1247                         if (stored_rc)
1248                                 rc = stored_rc;
1249                 }
1250         }
1251
1252         kfree(buf);
1253         free_xid(xid);
1254         return rc;
1255 }
1256
1257 static __u32
1258 hash_lockowner(fl_owner_t owner)
1259 {
1260         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1261 }
1262
1263 struct lock_to_push {
1264         struct list_head llist;
1265         __u64 offset;
1266         __u64 length;
1267         __u32 pid;
1268         __u16 netfid;
1269         __u8 type;
1270 };
1271
1272 static int
1273 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1274 {
1275         struct inode *inode = d_inode(cfile->dentry);
1276         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1277         struct file_lock *flock;
1278         struct file_lock_context *flctx = inode->i_flctx;
1279         unsigned int count = 0, i;
1280         int rc = 0, xid, type;
1281         struct list_head locks_to_send, *el;
1282         struct lock_to_push *lck, *tmp;
1283         __u64 length;
1284
1285         xid = get_xid();
1286
1287         if (!flctx)
1288                 goto out;
1289
1290         spin_lock(&flctx->flc_lock);
1291         list_for_each(el, &flctx->flc_posix) {
1292                 count++;
1293         }
1294         spin_unlock(&flctx->flc_lock);
1295
1296         INIT_LIST_HEAD(&locks_to_send);
1297
1298         /*
1299          * Allocating count locks is enough because no FL_POSIX locks can be
1300          * added to the list while we are holding cinode->lock_sem that
1301          * protects locking operations of this inode.
1302          */
1303         for (i = 0; i < count; i++) {
1304                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1305                 if (!lck) {
1306                         rc = -ENOMEM;
1307                         goto err_out;
1308                 }
1309                 list_add_tail(&lck->llist, &locks_to_send);
1310         }
1311
1312         el = locks_to_send.next;
1313         spin_lock(&flctx->flc_lock);
1314         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1315                 if (el == &locks_to_send) {
1316                         /*
1317                          * The list ended. We don't have enough allocated
1318                          * structures - something is really wrong.
1319                          */
1320                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1321                         break;
1322                 }
1323                 length = 1 + flock->fl_end - flock->fl_start;
1324                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1325                         type = CIFS_RDLCK;
1326                 else
1327                         type = CIFS_WRLCK;
1328                 lck = list_entry(el, struct lock_to_push, llist);
1329                 lck->pid = hash_lockowner(flock->fl_owner);
1330                 lck->netfid = cfile->fid.netfid;
1331                 lck->length = length;
1332                 lck->type = type;
1333                 lck->offset = flock->fl_start;
1334         }
1335         spin_unlock(&flctx->flc_lock);
1336
1337         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1338                 int stored_rc;
1339
1340                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1341                                              lck->offset, lck->length, NULL,
1342                                              lck->type, 0);
1343                 if (stored_rc)
1344                         rc = stored_rc;
1345                 list_del(&lck->llist);
1346                 kfree(lck);
1347         }
1348
1349 out:
1350         free_xid(xid);
1351         return rc;
1352 err_out:
1353         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1354                 list_del(&lck->llist);
1355                 kfree(lck);
1356         }
1357         goto out;
1358 }
1359
1360 static int
1361 cifs_push_locks(struct cifsFileInfo *cfile)
1362 {
1363         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1364         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1365         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1366         int rc = 0;
1367
1368         /* we are going to update can_cache_brlcks here - need a write access */
1369         cifs_down_write(&cinode->lock_sem);
1370         if (!cinode->can_cache_brlcks) {
1371                 up_write(&cinode->lock_sem);
1372                 return rc;
1373         }
1374
1375         if (cap_unix(tcon->ses) &&
1376             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1377             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1378                 rc = cifs_push_posix_locks(cfile);
1379         else
1380                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1381
1382         cinode->can_cache_brlcks = false;
1383         up_write(&cinode->lock_sem);
1384         return rc;
1385 }
1386
1387 static void
1388 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1389                 bool *wait_flag, struct TCP_Server_Info *server)
1390 {
1391         if (flock->fl_flags & FL_POSIX)
1392                 cifs_dbg(FYI, "Posix\n");
1393         if (flock->fl_flags & FL_FLOCK)
1394                 cifs_dbg(FYI, "Flock\n");
1395         if (flock->fl_flags & FL_SLEEP) {
1396                 cifs_dbg(FYI, "Blocking lock\n");
1397                 *wait_flag = true;
1398         }
1399         if (flock->fl_flags & FL_ACCESS)
1400                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1401         if (flock->fl_flags & FL_LEASE)
1402                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1403         if (flock->fl_flags &
1404             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1405                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1406                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1407
1408         *type = server->vals->large_lock_type;
1409         if (flock->fl_type == F_WRLCK) {
1410                 cifs_dbg(FYI, "F_WRLCK\n");
1411                 *type |= server->vals->exclusive_lock_type;
1412                 *lock = 1;
1413         } else if (flock->fl_type == F_UNLCK) {
1414                 cifs_dbg(FYI, "F_UNLCK\n");
1415                 *type |= server->vals->unlock_lock_type;
1416                 *unlock = 1;
1417                 /* Check if unlock includes more than one lock range */
1418         } else if (flock->fl_type == F_RDLCK) {
1419                 cifs_dbg(FYI, "F_RDLCK\n");
1420                 *type |= server->vals->shared_lock_type;
1421                 *lock = 1;
1422         } else if (flock->fl_type == F_EXLCK) {
1423                 cifs_dbg(FYI, "F_EXLCK\n");
1424                 *type |= server->vals->exclusive_lock_type;
1425                 *lock = 1;
1426         } else if (flock->fl_type == F_SHLCK) {
1427                 cifs_dbg(FYI, "F_SHLCK\n");
1428                 *type |= server->vals->shared_lock_type;
1429                 *lock = 1;
1430         } else
1431                 cifs_dbg(FYI, "Unknown type of lock\n");
1432 }
1433
1434 static int
1435 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1436            bool wait_flag, bool posix_lck, unsigned int xid)
1437 {
1438         int rc = 0;
1439         __u64 length = 1 + flock->fl_end - flock->fl_start;
1440         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1441         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1442         struct TCP_Server_Info *server = tcon->ses->server;
1443         __u16 netfid = cfile->fid.netfid;
1444
1445         if (posix_lck) {
1446                 int posix_lock_type;
1447
1448                 rc = cifs_posix_lock_test(file, flock);
1449                 if (!rc)
1450                         return rc;
1451
1452                 if (type & server->vals->shared_lock_type)
1453                         posix_lock_type = CIFS_RDLCK;
1454                 else
1455                         posix_lock_type = CIFS_WRLCK;
1456                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1457                                       hash_lockowner(flock->fl_owner),
1458                                       flock->fl_start, length, flock,
1459                                       posix_lock_type, wait_flag);
1460                 return rc;
1461         }
1462
1463         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1464         if (!rc)
1465                 return rc;
1466
1467         /* BB we could chain these into one lock request BB */
1468         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1469                                     1, 0, false);
1470         if (rc == 0) {
1471                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1472                                             type, 0, 1, false);
1473                 flock->fl_type = F_UNLCK;
1474                 if (rc != 0)
1475                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1476                                  rc);
1477                 return 0;
1478         }
1479
1480         if (type & server->vals->shared_lock_type) {
1481                 flock->fl_type = F_WRLCK;
1482                 return 0;
1483         }
1484
1485         type &= ~server->vals->exclusive_lock_type;
1486
1487         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1488                                     type | server->vals->shared_lock_type,
1489                                     1, 0, false);
1490         if (rc == 0) {
1491                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1492                         type | server->vals->shared_lock_type, 0, 1, false);
1493                 flock->fl_type = F_RDLCK;
1494                 if (rc != 0)
1495                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1496                                  rc);
1497         } else
1498                 flock->fl_type = F_WRLCK;
1499
1500         return 0;
1501 }
1502
1503 void
1504 cifs_move_llist(struct list_head *source, struct list_head *dest)
1505 {
1506         struct list_head *li, *tmp;
1507         list_for_each_safe(li, tmp, source)
1508                 list_move(li, dest);
1509 }
1510
1511 void
1512 cifs_free_llist(struct list_head *llist)
1513 {
1514         struct cifsLockInfo *li, *tmp;
1515         list_for_each_entry_safe(li, tmp, llist, llist) {
1516                 cifs_del_lock_waiters(li);
1517                 list_del(&li->llist);
1518                 kfree(li);
1519         }
1520 }
1521
1522 int
1523 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1524                   unsigned int xid)
1525 {
1526         int rc = 0, stored_rc;
1527         static const int types[] = {
1528                 LOCKING_ANDX_LARGE_FILES,
1529                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1530         };
1531         unsigned int i;
1532         unsigned int max_num, num, max_buf;
1533         LOCKING_ANDX_RANGE *buf, *cur;
1534         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1535         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1536         struct cifsLockInfo *li, *tmp;
1537         __u64 length = 1 + flock->fl_end - flock->fl_start;
1538         struct list_head tmp_llist;
1539
1540         INIT_LIST_HEAD(&tmp_llist);
1541
1542         /*
1543          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1544          * and check it before using.
1545          */
1546         max_buf = tcon->ses->server->maxBuf;
1547         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1548                 return -EINVAL;
1549
1550         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1551                      PAGE_SIZE);
1552         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1553                         PAGE_SIZE);
1554         max_num = (max_buf - sizeof(struct smb_hdr)) /
1555                                                 sizeof(LOCKING_ANDX_RANGE);
1556         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1557         if (!buf)
1558                 return -ENOMEM;
1559
1560         cifs_down_write(&cinode->lock_sem);
1561         for (i = 0; i < 2; i++) {
1562                 cur = buf;
1563                 num = 0;
1564                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1565                         if (flock->fl_start > li->offset ||
1566                             (flock->fl_start + length) <
1567                             (li->offset + li->length))
1568                                 continue;
1569                         if (current->tgid != li->pid)
1570                                 continue;
1571                         if (types[i] != li->type)
1572                                 continue;
1573                         if (cinode->can_cache_brlcks) {
1574                                 /*
1575                                  * We can cache brlock requests - simply remove
1576                                  * a lock from the file's list.
1577                                  */
1578                                 list_del(&li->llist);
1579                                 cifs_del_lock_waiters(li);
1580                                 kfree(li);
1581                                 continue;
1582                         }
1583                         cur->Pid = cpu_to_le16(li->pid);
1584                         cur->LengthLow = cpu_to_le32((u32)li->length);
1585                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1586                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1587                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1588                         /*
1589                          * We need to save a lock here to let us add it again to
1590                          * the file's list if the unlock range request fails on
1591                          * the server.
1592                          */
1593                         list_move(&li->llist, &tmp_llist);
1594                         if (++num == max_num) {
1595                                 stored_rc = cifs_lockv(xid, tcon,
1596                                                        cfile->fid.netfid,
1597                                                        li->type, num, 0, buf);
1598                                 if (stored_rc) {
1599                                         /*
1600                                          * We failed on the unlock range
1601                                          * request - add all locks from the tmp
1602                                          * list to the head of the file's list.
1603                                          */
1604                                         cifs_move_llist(&tmp_llist,
1605                                                         &cfile->llist->locks);
1606                                         rc = stored_rc;
1607                                 } else
1608                                         /*
1609                                          * The unlock range request succeed -
1610                                          * free the tmp list.
1611                                          */
1612                                         cifs_free_llist(&tmp_llist);
1613                                 cur = buf;
1614                                 num = 0;
1615                         } else
1616                                 cur++;
1617                 }
1618                 if (num) {
1619                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1620                                                types[i], num, 0, buf);
1621                         if (stored_rc) {
1622                                 cifs_move_llist(&tmp_llist,
1623                                                 &cfile->llist->locks);
1624                                 rc = stored_rc;
1625                         } else
1626                                 cifs_free_llist(&tmp_llist);
1627                 }
1628         }
1629
1630         up_write(&cinode->lock_sem);
1631         kfree(buf);
1632         return rc;
1633 }
1634
1635 static int
1636 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1637            bool wait_flag, bool posix_lck, int lock, int unlock,
1638            unsigned int xid)
1639 {
1640         int rc = 0;
1641         __u64 length = 1 + flock->fl_end - flock->fl_start;
1642         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1643         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1644         struct TCP_Server_Info *server = tcon->ses->server;
1645         struct inode *inode = d_inode(cfile->dentry);
1646
1647         if (posix_lck) {
1648                 int posix_lock_type;
1649
1650                 rc = cifs_posix_lock_set(file, flock);
1651                 if (!rc || rc < 0)
1652                         return rc;
1653
1654                 if (type & server->vals->shared_lock_type)
1655                         posix_lock_type = CIFS_RDLCK;
1656                 else
1657                         posix_lock_type = CIFS_WRLCK;
1658
1659                 if (unlock == 1)
1660                         posix_lock_type = CIFS_UNLCK;
1661
1662                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1663                                       hash_lockowner(flock->fl_owner),
1664                                       flock->fl_start, length,
1665                                       NULL, posix_lock_type, wait_flag);
1666                 goto out;
1667         }
1668
1669         if (lock) {
1670                 struct cifsLockInfo *lock;
1671
1672                 lock = cifs_lock_init(flock->fl_start, length, type,
1673                                       flock->fl_flags);
1674                 if (!lock)
1675                         return -ENOMEM;
1676
1677                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1678                 if (rc < 0) {
1679                         kfree(lock);
1680                         return rc;
1681                 }
1682                 if (!rc)
1683                         goto out;
1684
1685                 /*
1686                  * Windows 7 server can delay breaking lease from read to None
1687                  * if we set a byte-range lock on a file - break it explicitly
1688                  * before sending the lock to the server to be sure the next
1689                  * read won't conflict with non-overlapted locks due to
1690                  * pagereading.
1691                  */
1692                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1693                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1694                         cifs_zap_mapping(inode);
1695                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1696                                  inode);
1697                         CIFS_I(inode)->oplock = 0;
1698                 }
1699
1700                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1701                                             type, 1, 0, wait_flag);
1702                 if (rc) {
1703                         kfree(lock);
1704                         return rc;
1705                 }
1706
1707                 cifs_lock_add(cfile, lock);
1708         } else if (unlock)
1709                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1710
1711 out:
1712         if (flock->fl_flags & FL_POSIX) {
1713                 /*
1714                  * If this is a request to remove all locks because we
1715                  * are closing the file, it doesn't matter if the
1716                  * unlocking failed as both cifs.ko and the SMB server
1717                  * remove the lock on file close
1718                  */
1719                 if (rc) {
1720                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1721                         if (!(flock->fl_flags & FL_CLOSE))
1722                                 return rc;
1723                 }
1724                 rc = locks_lock_file_wait(file, flock);
1725         }
1726         return rc;
1727 }
1728
1729 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1730 {
1731         int rc, xid;
1732         int lock = 0, unlock = 0;
1733         bool wait_flag = false;
1734         bool posix_lck = false;
1735         struct cifs_sb_info *cifs_sb;
1736         struct cifs_tcon *tcon;
1737         struct cifsFileInfo *cfile;
1738         __u32 type;
1739
1740         rc = -EACCES;
1741         xid = get_xid();
1742
1743         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1744                  cmd, flock->fl_flags, flock->fl_type,
1745                  flock->fl_start, flock->fl_end);
1746
1747         cfile = (struct cifsFileInfo *)file->private_data;
1748         tcon = tlink_tcon(cfile->tlink);
1749
1750         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1751                         tcon->ses->server);
1752         cifs_sb = CIFS_FILE_SB(file);
1753
1754         if (cap_unix(tcon->ses) &&
1755             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1756             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1757                 posix_lck = true;
1758         /*
1759          * BB add code here to normalize offset and length to account for
1760          * negative length which we can not accept over the wire.
1761          */
1762         if (IS_GETLK(cmd)) {
1763                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1764                 free_xid(xid);
1765                 return rc;
1766         }
1767
1768         if (!lock && !unlock) {
1769                 /*
1770                  * if no lock or unlock then nothing to do since we do not
1771                  * know what it is
1772                  */
1773                 free_xid(xid);
1774                 return -EOPNOTSUPP;
1775         }
1776
1777         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1778                         xid);
1779         free_xid(xid);
1780         return rc;
1781 }
1782
1783 /*
1784  * update the file size (if needed) after a write. Should be called with
1785  * the inode->i_lock held
1786  */
1787 void
1788 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1789                       unsigned int bytes_written)
1790 {
1791         loff_t end_of_write = offset + bytes_written;
1792
1793         if (end_of_write > cifsi->server_eof)
1794                 cifsi->server_eof = end_of_write;
1795 }
1796
1797 static ssize_t
1798 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1799            size_t write_size, loff_t *offset)
1800 {
1801         int rc = 0;
1802         unsigned int bytes_written = 0;
1803         unsigned int total_written;
1804         struct cifs_tcon *tcon;
1805         struct TCP_Server_Info *server;
1806         unsigned int xid;
1807         struct dentry *dentry = open_file->dentry;
1808         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1809         struct cifs_io_parms io_parms;
1810
1811         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1812                  write_size, *offset, dentry);
1813
1814         tcon = tlink_tcon(open_file->tlink);
1815         server = tcon->ses->server;
1816
1817         if (!server->ops->sync_write)
1818                 return -ENOSYS;
1819
1820         xid = get_xid();
1821
1822         for (total_written = 0; write_size > total_written;
1823              total_written += bytes_written) {
1824                 rc = -EAGAIN;
1825                 while (rc == -EAGAIN) {
1826                         struct kvec iov[2];
1827                         unsigned int len;
1828
1829                         if (open_file->invalidHandle) {
1830                                 /* we could deadlock if we called
1831                                    filemap_fdatawait from here so tell
1832                                    reopen_file not to flush data to
1833                                    server now */
1834                                 rc = cifs_reopen_file(open_file, false);
1835                                 if (rc != 0)
1836                                         break;
1837                         }
1838
1839                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1840                                   (unsigned int)write_size - total_written);
1841                         /* iov[0] is reserved for smb header */
1842                         iov[1].iov_base = (char *)write_data + total_written;
1843                         iov[1].iov_len = len;
1844                         io_parms.pid = pid;
1845                         io_parms.tcon = tcon;
1846                         io_parms.offset = *offset;
1847                         io_parms.length = len;
1848                         rc = server->ops->sync_write(xid, &open_file->fid,
1849                                         &io_parms, &bytes_written, iov, 1);
1850                 }
1851                 if (rc || (bytes_written == 0)) {
1852                         if (total_written)
1853                                 break;
1854                         else {
1855                                 free_xid(xid);
1856                                 return rc;
1857                         }
1858                 } else {
1859                         spin_lock(&d_inode(dentry)->i_lock);
1860                         cifs_update_eof(cifsi, *offset, bytes_written);
1861                         spin_unlock(&d_inode(dentry)->i_lock);
1862                         *offset += bytes_written;
1863                 }
1864         }
1865
1866         cifs_stats_bytes_written(tcon, total_written);
1867
1868         if (total_written > 0) {
1869                 spin_lock(&d_inode(dentry)->i_lock);
1870                 if (*offset > d_inode(dentry)->i_size)
1871                         i_size_write(d_inode(dentry), *offset);
1872                 spin_unlock(&d_inode(dentry)->i_lock);
1873         }
1874         mark_inode_dirty_sync(d_inode(dentry));
1875         free_xid(xid);
1876         return total_written;
1877 }
1878
1879 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1880                                         bool fsuid_only)
1881 {
1882         struct cifsFileInfo *open_file = NULL;
1883         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1884
1885         /* only filter by fsuid on multiuser mounts */
1886         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1887                 fsuid_only = false;
1888
1889         spin_lock(&cifs_inode->open_file_lock);
1890         /* we could simply get the first_list_entry since write-only entries
1891            are always at the end of the list but since the first entry might
1892            have a close pending, we go through the whole list */
1893         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1894                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1895                         continue;
1896                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1897                         if (!open_file->invalidHandle) {
1898                                 /* found a good file */
1899                                 /* lock it so it will not be closed on us */
1900                                 cifsFileInfo_get(open_file);
1901                                 spin_unlock(&cifs_inode->open_file_lock);
1902                                 return open_file;
1903                         } /* else might as well continue, and look for
1904                              another, or simply have the caller reopen it
1905                              again rather than trying to fix this handle */
1906                 } else /* write only file */
1907                         break; /* write only files are last so must be done */
1908         }
1909         spin_unlock(&cifs_inode->open_file_lock);
1910         return NULL;
1911 }
1912
1913 /* Return -EBADF if no handle is found and general rc otherwise */
1914 int
1915 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1916                        struct cifsFileInfo **ret_file)
1917 {
1918         struct cifsFileInfo *open_file, *inv_file = NULL;
1919         struct cifs_sb_info *cifs_sb;
1920         bool any_available = false;
1921         int rc = -EBADF;
1922         unsigned int refind = 0;
1923         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1924         bool with_delete = flags & FIND_WR_WITH_DELETE;
1925         *ret_file = NULL;
1926
1927         /*
1928          * Having a null inode here (because mapping->host was set to zero by
1929          * the VFS or MM) should not happen but we had reports of on oops (due
1930          * to it being zero) during stress testcases so we need to check for it
1931          */
1932
1933         if (cifs_inode == NULL) {
1934                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1935                 dump_stack();
1936                 return rc;
1937         }
1938
1939         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1940
1941         /* only filter by fsuid on multiuser mounts */
1942         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1943                 fsuid_only = false;
1944
1945         spin_lock(&cifs_inode->open_file_lock);
1946 refind_writable:
1947         if (refind > MAX_REOPEN_ATT) {
1948                 spin_unlock(&cifs_inode->open_file_lock);
1949                 return rc;
1950         }
1951         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1952                 if (!any_available && open_file->pid != current->tgid)
1953                         continue;
1954                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1955                         continue;
1956                 if (with_delete && !(open_file->fid.access & DELETE))
1957                         continue;
1958                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1959                         if (!open_file->invalidHandle) {
1960                                 /* found a good writable file */
1961                                 cifsFileInfo_get(open_file);
1962                                 spin_unlock(&cifs_inode->open_file_lock);
1963                                 *ret_file = open_file;
1964                                 return 0;
1965                         } else {
1966                                 if (!inv_file)
1967                                         inv_file = open_file;
1968                         }
1969                 }
1970         }
1971         /* couldn't find useable FH with same pid, try any available */
1972         if (!any_available) {
1973                 any_available = true;
1974                 goto refind_writable;
1975         }
1976
1977         if (inv_file) {
1978                 any_available = false;
1979                 cifsFileInfo_get(inv_file);
1980         }
1981
1982         spin_unlock(&cifs_inode->open_file_lock);
1983
1984         if (inv_file) {
1985                 rc = cifs_reopen_file(inv_file, false);
1986                 if (!rc) {
1987                         *ret_file = inv_file;
1988                         return 0;
1989                 }
1990
1991                 spin_lock(&cifs_inode->open_file_lock);
1992                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
1993                 spin_unlock(&cifs_inode->open_file_lock);
1994                 cifsFileInfo_put(inv_file);
1995                 ++refind;
1996                 inv_file = NULL;
1997                 spin_lock(&cifs_inode->open_file_lock);
1998                 goto refind_writable;
1999         }
2000
2001         return rc;
2002 }
2003
2004 struct cifsFileInfo *
2005 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2006 {
2007         struct cifsFileInfo *cfile;
2008         int rc;
2009
2010         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2011         if (rc)
2012                 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
2013
2014         return cfile;
2015 }
2016
2017 int
2018 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2019                        int flags,
2020                        struct cifsFileInfo **ret_file)
2021 {
2022         struct list_head *tmp;
2023         struct cifsFileInfo *cfile;
2024         struct cifsInodeInfo *cinode;
2025         char *full_path;
2026
2027         *ret_file = NULL;
2028
2029         spin_lock(&tcon->open_file_lock);
2030         list_for_each(tmp, &tcon->openFileList) {
2031                 cfile = list_entry(tmp, struct cifsFileInfo,
2032                              tlist);
2033                 full_path = build_path_from_dentry(cfile->dentry);
2034                 if (full_path == NULL) {
2035                         spin_unlock(&tcon->open_file_lock);
2036                         return -ENOMEM;
2037                 }
2038                 if (strcmp(full_path, name)) {
2039                         kfree(full_path);
2040                         continue;
2041                 }
2042
2043                 kfree(full_path);
2044                 cinode = CIFS_I(d_inode(cfile->dentry));
2045                 spin_unlock(&tcon->open_file_lock);
2046                 return cifs_get_writable_file(cinode, flags, ret_file);
2047         }
2048
2049         spin_unlock(&tcon->open_file_lock);
2050         return -ENOENT;
2051 }
2052
2053 int
2054 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2055                        struct cifsFileInfo **ret_file)
2056 {
2057         struct list_head *tmp;
2058         struct cifsFileInfo *cfile;
2059         struct cifsInodeInfo *cinode;
2060         char *full_path;
2061
2062         *ret_file = NULL;
2063
2064         spin_lock(&tcon->open_file_lock);
2065         list_for_each(tmp, &tcon->openFileList) {
2066                 cfile = list_entry(tmp, struct cifsFileInfo,
2067                              tlist);
2068                 full_path = build_path_from_dentry(cfile->dentry);
2069                 if (full_path == NULL) {
2070                         spin_unlock(&tcon->open_file_lock);
2071                         return -ENOMEM;
2072                 }
2073                 if (strcmp(full_path, name)) {
2074                         kfree(full_path);
2075                         continue;
2076                 }
2077
2078                 kfree(full_path);
2079                 cinode = CIFS_I(d_inode(cfile->dentry));
2080                 spin_unlock(&tcon->open_file_lock);
2081                 *ret_file = find_readable_file(cinode, 0);
2082                 return *ret_file ? 0 : -ENOENT;
2083         }
2084
2085         spin_unlock(&tcon->open_file_lock);
2086         return -ENOENT;
2087 }
2088
2089 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2090 {
2091         struct address_space *mapping = page->mapping;
2092         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2093         char *write_data;
2094         int rc = -EFAULT;
2095         int bytes_written = 0;
2096         struct inode *inode;
2097         struct cifsFileInfo *open_file;
2098
2099         if (!mapping || !mapping->host)
2100                 return -EFAULT;
2101
2102         inode = page->mapping->host;
2103
2104         offset += (loff_t)from;
2105         write_data = kmap(page);
2106         write_data += from;
2107
2108         if ((to > PAGE_SIZE) || (from > to)) {
2109                 kunmap(page);
2110                 return -EIO;
2111         }
2112
2113         /* racing with truncate? */
2114         if (offset > mapping->host->i_size) {
2115                 kunmap(page);
2116                 return 0; /* don't care */
2117         }
2118
2119         /* check to make sure that we are not extending the file */
2120         if (mapping->host->i_size - offset < (loff_t)to)
2121                 to = (unsigned)(mapping->host->i_size - offset);
2122
2123         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2124                                     &open_file);
2125         if (!rc) {
2126                 bytes_written = cifs_write(open_file, open_file->pid,
2127                                            write_data, to - from, &offset);
2128                 cifsFileInfo_put(open_file);
2129                 /* Does mm or vfs already set times? */
2130                 inode->i_atime = inode->i_mtime = current_time(inode);
2131                 if ((bytes_written > 0) && (offset))
2132                         rc = 0;
2133                 else if (bytes_written < 0)
2134                         rc = bytes_written;
2135                 else
2136                         rc = -EFAULT;
2137         } else {
2138                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2139                 if (!is_retryable_error(rc))
2140                         rc = -EIO;
2141         }
2142
2143         kunmap(page);
2144         return rc;
2145 }
2146
2147 static struct cifs_writedata *
2148 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2149                           pgoff_t end, pgoff_t *index,
2150                           unsigned int *found_pages)
2151 {
2152         struct cifs_writedata *wdata;
2153
2154         wdata = cifs_writedata_alloc((unsigned int)tofind,
2155                                      cifs_writev_complete);
2156         if (!wdata)
2157                 return NULL;
2158
2159         *found_pages = find_get_pages_range_tag(mapping, index, end,
2160                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2161         return wdata;
2162 }
2163
2164 static unsigned int
2165 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2166                     struct address_space *mapping,
2167                     struct writeback_control *wbc,
2168                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2169 {
2170         unsigned int nr_pages = 0, i;
2171         struct page *page;
2172
2173         for (i = 0; i < found_pages; i++) {
2174                 page = wdata->pages[i];
2175                 /*
2176                  * At this point we hold neither the i_pages lock nor the
2177                  * page lock: the page may be truncated or invalidated
2178                  * (changing page->mapping to NULL), or even swizzled
2179                  * back from swapper_space to tmpfs file mapping
2180                  */
2181
2182                 if (nr_pages == 0)
2183                         lock_page(page);
2184                 else if (!trylock_page(page))
2185                         break;
2186
2187                 if (unlikely(page->mapping != mapping)) {
2188                         unlock_page(page);
2189                         break;
2190                 }
2191
2192                 if (!wbc->range_cyclic && page->index > end) {
2193                         *done = true;
2194                         unlock_page(page);
2195                         break;
2196                 }
2197
2198                 if (*next && (page->index != *next)) {
2199                         /* Not next consecutive page */
2200                         unlock_page(page);
2201                         break;
2202                 }
2203
2204                 if (wbc->sync_mode != WB_SYNC_NONE)
2205                         wait_on_page_writeback(page);
2206
2207                 if (PageWriteback(page) ||
2208                                 !clear_page_dirty_for_io(page)) {
2209                         unlock_page(page);
2210                         break;
2211                 }
2212
2213                 /*
2214                  * This actually clears the dirty bit in the radix tree.
2215                  * See cifs_writepage() for more commentary.
2216                  */
2217                 set_page_writeback(page);
2218                 if (page_offset(page) >= i_size_read(mapping->host)) {
2219                         *done = true;
2220                         unlock_page(page);
2221                         end_page_writeback(page);
2222                         break;
2223                 }
2224
2225                 wdata->pages[i] = page;
2226                 *next = page->index + 1;
2227                 ++nr_pages;
2228         }
2229
2230         /* reset index to refind any pages skipped */
2231         if (nr_pages == 0)
2232                 *index = wdata->pages[0]->index + 1;
2233
2234         /* put any pages we aren't going to use */
2235         for (i = nr_pages; i < found_pages; i++) {
2236                 put_page(wdata->pages[i]);
2237                 wdata->pages[i] = NULL;
2238         }
2239
2240         return nr_pages;
2241 }
2242
2243 static int
2244 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2245                  struct address_space *mapping, struct writeback_control *wbc)
2246 {
2247         int rc;
2248         struct TCP_Server_Info *server =
2249                                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2250
2251         wdata->sync_mode = wbc->sync_mode;
2252         wdata->nr_pages = nr_pages;
2253         wdata->offset = page_offset(wdata->pages[0]);
2254         wdata->pagesz = PAGE_SIZE;
2255         wdata->tailsz = min(i_size_read(mapping->host) -
2256                         page_offset(wdata->pages[nr_pages - 1]),
2257                         (loff_t)PAGE_SIZE);
2258         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2259         wdata->pid = wdata->cfile->pid;
2260
2261         rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2262         if (rc)
2263                 return rc;
2264
2265         if (wdata->cfile->invalidHandle)
2266                 rc = -EAGAIN;
2267         else
2268                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2269
2270         return rc;
2271 }
2272
2273 static int cifs_writepages(struct address_space *mapping,
2274                            struct writeback_control *wbc)
2275 {
2276         struct inode *inode = mapping->host;
2277         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2278         struct TCP_Server_Info *server;
2279         bool done = false, scanned = false, range_whole = false;
2280         pgoff_t end, index;
2281         struct cifs_writedata *wdata;
2282         struct cifsFileInfo *cfile = NULL;
2283         int rc = 0;
2284         int saved_rc = 0;
2285         unsigned int xid;
2286
2287         /*
2288          * If wsize is smaller than the page cache size, default to writing
2289          * one page at a time via cifs_writepage
2290          */
2291         if (cifs_sb->wsize < PAGE_SIZE)
2292                 return generic_writepages(mapping, wbc);
2293
2294         xid = get_xid();
2295         if (wbc->range_cyclic) {
2296                 index = mapping->writeback_index; /* Start from prev offset */
2297                 end = -1;
2298         } else {
2299                 index = wbc->range_start >> PAGE_SHIFT;
2300                 end = wbc->range_end >> PAGE_SHIFT;
2301                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2302                         range_whole = true;
2303                 scanned = true;
2304         }
2305         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2306 retry:
2307         while (!done && index <= end) {
2308                 unsigned int i, nr_pages, found_pages, wsize;
2309                 pgoff_t next = 0, tofind, saved_index = index;
2310                 struct cifs_credits credits_on_stack;
2311                 struct cifs_credits *credits = &credits_on_stack;
2312                 int get_file_rc = 0;
2313
2314                 if (cfile)
2315                         cifsFileInfo_put(cfile);
2316
2317                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2318
2319                 /* in case of an error store it to return later */
2320                 if (rc)
2321                         get_file_rc = rc;
2322
2323                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2324                                                    &wsize, credits);
2325                 if (rc != 0) {
2326                         done = true;
2327                         break;
2328                 }
2329
2330                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2331
2332                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2333                                                   &found_pages);
2334                 if (!wdata) {
2335                         rc = -ENOMEM;
2336                         done = true;
2337                         add_credits_and_wake_if(server, credits, 0);
2338                         break;
2339                 }
2340
2341                 if (found_pages == 0) {
2342                         kref_put(&wdata->refcount, cifs_writedata_release);
2343                         add_credits_and_wake_if(server, credits, 0);
2344                         break;
2345                 }
2346
2347                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2348                                                end, &index, &next, &done);
2349
2350                 /* nothing to write? */
2351                 if (nr_pages == 0) {
2352                         kref_put(&wdata->refcount, cifs_writedata_release);
2353                         add_credits_and_wake_if(server, credits, 0);
2354                         continue;
2355                 }
2356
2357                 wdata->credits = credits_on_stack;
2358                 wdata->cfile = cfile;
2359                 cfile = NULL;
2360
2361                 if (!wdata->cfile) {
2362                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2363                                  get_file_rc);
2364                         if (is_retryable_error(get_file_rc))
2365                                 rc = get_file_rc;
2366                         else
2367                                 rc = -EBADF;
2368                 } else
2369                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2370
2371                 for (i = 0; i < nr_pages; ++i)
2372                         unlock_page(wdata->pages[i]);
2373
2374                 /* send failure -- clean up the mess */
2375                 if (rc != 0) {
2376                         add_credits_and_wake_if(server, &wdata->credits, 0);
2377                         for (i = 0; i < nr_pages; ++i) {
2378                                 if (is_retryable_error(rc))
2379                                         redirty_page_for_writepage(wbc,
2380                                                            wdata->pages[i]);
2381                                 else
2382                                         SetPageError(wdata->pages[i]);
2383                                 end_page_writeback(wdata->pages[i]);
2384                                 put_page(wdata->pages[i]);
2385                         }
2386                         if (!is_retryable_error(rc))
2387                                 mapping_set_error(mapping, rc);
2388                 }
2389                 kref_put(&wdata->refcount, cifs_writedata_release);
2390
2391                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2392                         index = saved_index;
2393                         continue;
2394                 }
2395
2396                 /* Return immediately if we received a signal during writing */
2397                 if (is_interrupt_error(rc)) {
2398                         done = true;
2399                         break;
2400                 }
2401
2402                 if (rc != 0 && saved_rc == 0)
2403                         saved_rc = rc;
2404
2405                 wbc->nr_to_write -= nr_pages;
2406                 if (wbc->nr_to_write <= 0)
2407                         done = true;
2408
2409                 index = next;
2410         }
2411
2412         if (!scanned && !done) {
2413                 /*
2414                  * We hit the last page and there is more work to be done: wrap
2415                  * back to the start of the file
2416                  */
2417                 scanned = true;
2418                 index = 0;
2419                 goto retry;
2420         }
2421
2422         if (saved_rc != 0)
2423                 rc = saved_rc;
2424
2425         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2426                 mapping->writeback_index = index;
2427
2428         if (cfile)
2429                 cifsFileInfo_put(cfile);
2430         free_xid(xid);
2431         return rc;
2432 }
2433
2434 static int
2435 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2436 {
2437         int rc;
2438         unsigned int xid;
2439
2440         xid = get_xid();
2441 /* BB add check for wbc flags */
2442         get_page(page);
2443         if (!PageUptodate(page))
2444                 cifs_dbg(FYI, "ppw - page not up to date\n");
2445
2446         /*
2447          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2448          *
2449          * A writepage() implementation always needs to do either this,
2450          * or re-dirty the page with "redirty_page_for_writepage()" in
2451          * the case of a failure.
2452          *
2453          * Just unlocking the page will cause the radix tree tag-bits
2454          * to fail to update with the state of the page correctly.
2455          */
2456         set_page_writeback(page);
2457 retry_write:
2458         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2459         if (is_retryable_error(rc)) {
2460                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2461                         goto retry_write;
2462                 redirty_page_for_writepage(wbc, page);
2463         } else if (rc != 0) {
2464                 SetPageError(page);
2465                 mapping_set_error(page->mapping, rc);
2466         } else {
2467                 SetPageUptodate(page);
2468         }
2469         end_page_writeback(page);
2470         put_page(page);
2471         free_xid(xid);
2472         return rc;
2473 }
2474
2475 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2476 {
2477         int rc = cifs_writepage_locked(page, wbc);
2478         unlock_page(page);
2479         return rc;
2480 }
2481
2482 static int cifs_write_end(struct file *file, struct address_space *mapping,
2483                         loff_t pos, unsigned len, unsigned copied,
2484                         struct page *page, void *fsdata)
2485 {
2486         int rc;
2487         struct inode *inode = mapping->host;
2488         struct cifsFileInfo *cfile = file->private_data;
2489         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2490         __u32 pid;
2491
2492         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2493                 pid = cfile->pid;
2494         else
2495                 pid = current->tgid;
2496
2497         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2498                  page, pos, copied);
2499
2500         if (PageChecked(page)) {
2501                 if (copied == len)
2502                         SetPageUptodate(page);
2503                 ClearPageChecked(page);
2504         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2505                 SetPageUptodate(page);
2506
2507         if (!PageUptodate(page)) {
2508                 char *page_data;
2509                 unsigned offset = pos & (PAGE_SIZE - 1);
2510                 unsigned int xid;
2511
2512                 xid = get_xid();
2513                 /* this is probably better than directly calling
2514                    partialpage_write since in this function the file handle is
2515                    known which we might as well leverage */
2516                 /* BB check if anything else missing out of ppw
2517                    such as updating last write time */
2518                 page_data = kmap(page);
2519                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2520                 /* if (rc < 0) should we set writebehind rc? */
2521                 kunmap(page);
2522
2523                 free_xid(xid);
2524         } else {
2525                 rc = copied;
2526                 pos += copied;
2527                 set_page_dirty(page);
2528         }
2529
2530         if (rc > 0) {
2531                 spin_lock(&inode->i_lock);
2532                 if (pos > inode->i_size)
2533                         i_size_write(inode, pos);
2534                 spin_unlock(&inode->i_lock);
2535         }
2536
2537         unlock_page(page);
2538         put_page(page);
2539
2540         return rc;
2541 }
2542
2543 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2544                       int datasync)
2545 {
2546         unsigned int xid;
2547         int rc = 0;
2548         struct cifs_tcon *tcon;
2549         struct TCP_Server_Info *server;
2550         struct cifsFileInfo *smbfile = file->private_data;
2551         struct inode *inode = file_inode(file);
2552         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2553
2554         rc = file_write_and_wait_range(file, start, end);
2555         if (rc)
2556                 return rc;
2557
2558         xid = get_xid();
2559
2560         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2561                  file, datasync);
2562
2563         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2564                 rc = cifs_zap_mapping(inode);
2565                 if (rc) {
2566                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2567                         rc = 0; /* don't care about it in fsync */
2568                 }
2569         }
2570
2571         tcon = tlink_tcon(smbfile->tlink);
2572         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2573                 server = tcon->ses->server;
2574                 if (server->ops->flush == NULL) {
2575                         rc = -ENOSYS;
2576                         goto strict_fsync_exit;
2577                 }
2578
2579                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2580                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2581                         if (smbfile) {
2582                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2583                                 cifsFileInfo_put(smbfile);
2584                         } else
2585                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2586                 } else
2587                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2588         }
2589
2590 strict_fsync_exit:
2591         free_xid(xid);
2592         return rc;
2593 }
2594
2595 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2596 {
2597         unsigned int xid;
2598         int rc = 0;
2599         struct cifs_tcon *tcon;
2600         struct TCP_Server_Info *server;
2601         struct cifsFileInfo *smbfile = file->private_data;
2602         struct inode *inode = file_inode(file);
2603         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2604
2605         rc = file_write_and_wait_range(file, start, end);
2606         if (rc)
2607                 return rc;
2608
2609         xid = get_xid();
2610
2611         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2612                  file, datasync);
2613
2614         tcon = tlink_tcon(smbfile->tlink);
2615         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2616                 server = tcon->ses->server;
2617                 if (server->ops->flush == NULL) {
2618                         rc = -ENOSYS;
2619                         goto fsync_exit;
2620                 }
2621
2622                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2623                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2624                         if (smbfile) {
2625                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2626                                 cifsFileInfo_put(smbfile);
2627                         } else
2628                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2629                 } else
2630                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2631         }
2632
2633 fsync_exit:
2634         free_xid(xid);
2635         return rc;
2636 }
2637
2638 /*
2639  * As file closes, flush all cached write data for this inode checking
2640  * for write behind errors.
2641  */
2642 int cifs_flush(struct file *file, fl_owner_t id)
2643 {
2644         struct inode *inode = file_inode(file);
2645         int rc = 0;
2646
2647         if (file->f_mode & FMODE_WRITE)
2648                 rc = filemap_write_and_wait(inode->i_mapping);
2649
2650         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2651
2652         return rc;
2653 }
2654
2655 static int
2656 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2657 {
2658         int rc = 0;
2659         unsigned long i;
2660
2661         for (i = 0; i < num_pages; i++) {
2662                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2663                 if (!pages[i]) {
2664                         /*
2665                          * save number of pages we have already allocated and
2666                          * return with ENOMEM error
2667                          */
2668                         num_pages = i;
2669                         rc = -ENOMEM;
2670                         break;
2671                 }
2672         }
2673
2674         if (rc) {
2675                 for (i = 0; i < num_pages; i++)
2676                         put_page(pages[i]);
2677         }
2678         return rc;
2679 }
2680
2681 static inline
2682 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2683 {
2684         size_t num_pages;
2685         size_t clen;
2686
2687         clen = min_t(const size_t, len, wsize);
2688         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2689
2690         if (cur_len)
2691                 *cur_len = clen;
2692
2693         return num_pages;
2694 }
2695
2696 static void
2697 cifs_uncached_writedata_release(struct kref *refcount)
2698 {
2699         int i;
2700         struct cifs_writedata *wdata = container_of(refcount,
2701                                         struct cifs_writedata, refcount);
2702
2703         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2704         for (i = 0; i < wdata->nr_pages; i++)
2705                 put_page(wdata->pages[i]);
2706         cifs_writedata_release(refcount);
2707 }
2708
2709 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2710
2711 static void
2712 cifs_uncached_writev_complete(struct work_struct *work)
2713 {
2714         struct cifs_writedata *wdata = container_of(work,
2715                                         struct cifs_writedata, work);
2716         struct inode *inode = d_inode(wdata->cfile->dentry);
2717         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2718
2719         spin_lock(&inode->i_lock);
2720         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2721         if (cifsi->server_eof > inode->i_size)
2722                 i_size_write(inode, cifsi->server_eof);
2723         spin_unlock(&inode->i_lock);
2724
2725         complete(&wdata->done);
2726         collect_uncached_write_data(wdata->ctx);
2727         /* the below call can possibly free the last ref to aio ctx */
2728         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2729 }
2730
2731 static int
2732 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2733                       size_t *len, unsigned long *num_pages)
2734 {
2735         size_t save_len, copied, bytes, cur_len = *len;
2736         unsigned long i, nr_pages = *num_pages;
2737
2738         save_len = cur_len;
2739         for (i = 0; i < nr_pages; i++) {
2740                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2741                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2742                 cur_len -= copied;
2743                 /*
2744                  * If we didn't copy as much as we expected, then that
2745                  * may mean we trod into an unmapped area. Stop copying
2746                  * at that point. On the next pass through the big
2747                  * loop, we'll likely end up getting a zero-length
2748                  * write and bailing out of it.
2749                  */
2750                 if (copied < bytes)
2751                         break;
2752         }
2753         cur_len = save_len - cur_len;
2754         *len = cur_len;
2755
2756         /*
2757          * If we have no data to send, then that probably means that
2758          * the copy above failed altogether. That's most likely because
2759          * the address in the iovec was bogus. Return -EFAULT and let
2760          * the caller free anything we allocated and bail out.
2761          */
2762         if (!cur_len)
2763                 return -EFAULT;
2764
2765         /*
2766          * i + 1 now represents the number of pages we actually used in
2767          * the copy phase above.
2768          */
2769         *num_pages = i + 1;
2770         return 0;
2771 }
2772
2773 static int
2774 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2775         struct cifs_aio_ctx *ctx)
2776 {
2777         unsigned int wsize;
2778         struct cifs_credits credits;
2779         int rc;
2780         struct TCP_Server_Info *server =
2781                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2782
2783         do {
2784                 if (wdata->cfile->invalidHandle) {
2785                         rc = cifs_reopen_file(wdata->cfile, false);
2786                         if (rc == -EAGAIN)
2787                                 continue;
2788                         else if (rc)
2789                                 break;
2790                 }
2791
2792
2793                 /*
2794                  * Wait for credits to resend this wdata.
2795                  * Note: we are attempting to resend the whole wdata not in
2796                  * segments
2797                  */
2798                 do {
2799                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2800                                                 &wsize, &credits);
2801                         if (rc)
2802                                 goto fail;
2803
2804                         if (wsize < wdata->bytes) {
2805                                 add_credits_and_wake_if(server, &credits, 0);
2806                                 msleep(1000);
2807                         }
2808                 } while (wsize < wdata->bytes);
2809                 wdata->credits = credits;
2810
2811                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2812
2813                 if (!rc) {
2814                         if (wdata->cfile->invalidHandle)
2815                                 rc = -EAGAIN;
2816                         else
2817                                 rc = server->ops->async_writev(wdata,
2818                                         cifs_uncached_writedata_release);
2819                 }
2820
2821                 /* If the write was successfully sent, we are done */
2822                 if (!rc) {
2823                         list_add_tail(&wdata->list, wdata_list);
2824                         return 0;
2825                 }
2826
2827                 /* Roll back credits and retry if needed */
2828                 add_credits_and_wake_if(server, &wdata->credits, 0);
2829         } while (rc == -EAGAIN);
2830
2831 fail:
2832         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2833         return rc;
2834 }
2835
2836 static int
2837 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2838                      struct cifsFileInfo *open_file,
2839                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2840                      struct cifs_aio_ctx *ctx)
2841 {
2842         int rc = 0;
2843         size_t cur_len;
2844         unsigned long nr_pages, num_pages, i;
2845         struct cifs_writedata *wdata;
2846         struct iov_iter saved_from = *from;
2847         loff_t saved_offset = offset;
2848         pid_t pid;
2849         struct TCP_Server_Info *server;
2850         struct page **pagevec;
2851         size_t start;
2852         unsigned int xid;
2853
2854         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2855                 pid = open_file->pid;
2856         else
2857                 pid = current->tgid;
2858
2859         server = tlink_tcon(open_file->tlink)->ses->server;
2860         xid = get_xid();
2861
2862         do {
2863                 unsigned int wsize;
2864                 struct cifs_credits credits_on_stack;
2865                 struct cifs_credits *credits = &credits_on_stack;
2866
2867                 if (open_file->invalidHandle) {
2868                         rc = cifs_reopen_file(open_file, false);
2869                         if (rc == -EAGAIN)
2870                                 continue;
2871                         else if (rc)
2872                                 break;
2873                 }
2874
2875                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2876                                                    &wsize, credits);
2877                 if (rc)
2878                         break;
2879
2880                 cur_len = min_t(const size_t, len, wsize);
2881
2882                 if (ctx->direct_io) {
2883                         ssize_t result;
2884
2885                         result = iov_iter_get_pages_alloc(
2886                                 from, &pagevec, cur_len, &start);
2887                         if (result < 0) {
2888                                 cifs_dbg(VFS,
2889                                         "direct_writev couldn't get user pages "
2890                                         "(rc=%zd) iter type %d iov_offset %zd "
2891                                         "count %zd\n",
2892                                         result, from->type,
2893                                         from->iov_offset, from->count);
2894                                 dump_stack();
2895
2896                                 rc = result;
2897                                 add_credits_and_wake_if(server, credits, 0);
2898                                 break;
2899                         }
2900                         cur_len = (size_t)result;
2901                         iov_iter_advance(from, cur_len);
2902
2903                         nr_pages =
2904                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2905
2906                         wdata = cifs_writedata_direct_alloc(pagevec,
2907                                              cifs_uncached_writev_complete);
2908                         if (!wdata) {
2909                                 rc = -ENOMEM;
2910                                 add_credits_and_wake_if(server, credits, 0);
2911                                 break;
2912                         }
2913
2914
2915                         wdata->page_offset = start;
2916                         wdata->tailsz =
2917                                 nr_pages > 1 ?
2918                                         cur_len - (PAGE_SIZE - start) -
2919                                         (nr_pages - 2) * PAGE_SIZE :
2920                                         cur_len;
2921                 } else {
2922                         nr_pages = get_numpages(wsize, len, &cur_len);
2923                         wdata = cifs_writedata_alloc(nr_pages,
2924                                              cifs_uncached_writev_complete);
2925                         if (!wdata) {
2926                                 rc = -ENOMEM;
2927                                 add_credits_and_wake_if(server, credits, 0);
2928                                 break;
2929                         }
2930
2931                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2932                         if (rc) {
2933                                 kvfree(wdata->pages);
2934                                 kfree(wdata);
2935                                 add_credits_and_wake_if(server, credits, 0);
2936                                 break;
2937                         }
2938
2939                         num_pages = nr_pages;
2940                         rc = wdata_fill_from_iovec(
2941                                 wdata, from, &cur_len, &num_pages);
2942                         if (rc) {
2943                                 for (i = 0; i < nr_pages; i++)
2944                                         put_page(wdata->pages[i]);
2945                                 kvfree(wdata->pages);
2946                                 kfree(wdata);
2947                                 add_credits_and_wake_if(server, credits, 0);
2948                                 break;
2949                         }
2950
2951                         /*
2952                          * Bring nr_pages down to the number of pages we
2953                          * actually used, and free any pages that we didn't use.
2954                          */
2955                         for ( ; nr_pages > num_pages; nr_pages--)
2956                                 put_page(wdata->pages[nr_pages - 1]);
2957
2958                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2959                 }
2960
2961                 wdata->sync_mode = WB_SYNC_ALL;
2962                 wdata->nr_pages = nr_pages;
2963                 wdata->offset = (__u64)offset;
2964                 wdata->cfile = cifsFileInfo_get(open_file);
2965                 wdata->pid = pid;
2966                 wdata->bytes = cur_len;
2967                 wdata->pagesz = PAGE_SIZE;
2968                 wdata->credits = credits_on_stack;
2969                 wdata->ctx = ctx;
2970                 kref_get(&ctx->refcount);
2971
2972                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2973
2974                 if (!rc) {
2975                         if (wdata->cfile->invalidHandle)
2976                                 rc = -EAGAIN;
2977                         else
2978                                 rc = server->ops->async_writev(wdata,
2979                                         cifs_uncached_writedata_release);
2980                 }
2981
2982                 if (rc) {
2983                         add_credits_and_wake_if(server, &wdata->credits, 0);
2984                         kref_put(&wdata->refcount,
2985                                  cifs_uncached_writedata_release);
2986                         if (rc == -EAGAIN) {
2987                                 *from = saved_from;
2988                                 iov_iter_advance(from, offset - saved_offset);
2989                                 continue;
2990                         }
2991                         break;
2992                 }
2993
2994                 list_add_tail(&wdata->list, wdata_list);
2995                 offset += cur_len;
2996                 len -= cur_len;
2997         } while (len > 0);
2998
2999         free_xid(xid);
3000         return rc;
3001 }
3002
3003 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3004 {
3005         struct cifs_writedata *wdata, *tmp;
3006         struct cifs_tcon *tcon;
3007         struct cifs_sb_info *cifs_sb;
3008         struct dentry *dentry = ctx->cfile->dentry;
3009         ssize_t rc;
3010
3011         tcon = tlink_tcon(ctx->cfile->tlink);
3012         cifs_sb = CIFS_SB(dentry->d_sb);
3013
3014         mutex_lock(&ctx->aio_mutex);
3015
3016         if (list_empty(&ctx->list)) {
3017                 mutex_unlock(&ctx->aio_mutex);
3018                 return;
3019         }
3020
3021         rc = ctx->rc;
3022         /*
3023          * Wait for and collect replies for any successful sends in order of
3024          * increasing offset. Once an error is hit, then return without waiting
3025          * for any more replies.
3026          */
3027 restart_loop:
3028         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3029                 if (!rc) {
3030                         if (!try_wait_for_completion(&wdata->done)) {
3031                                 mutex_unlock(&ctx->aio_mutex);
3032                                 return;
3033                         }
3034
3035                         if (wdata->result)
3036                                 rc = wdata->result;
3037                         else
3038                                 ctx->total_len += wdata->bytes;
3039
3040                         /* resend call if it's a retryable error */
3041                         if (rc == -EAGAIN) {
3042                                 struct list_head tmp_list;
3043                                 struct iov_iter tmp_from = ctx->iter;
3044
3045                                 INIT_LIST_HEAD(&tmp_list);
3046                                 list_del_init(&wdata->list);
3047
3048                                 if (ctx->direct_io)
3049                                         rc = cifs_resend_wdata(
3050                                                 wdata, &tmp_list, ctx);
3051                                 else {
3052                                         iov_iter_advance(&tmp_from,
3053                                                  wdata->offset - ctx->pos);
3054
3055                                         rc = cifs_write_from_iter(wdata->offset,
3056                                                 wdata->bytes, &tmp_from,
3057                                                 ctx->cfile, cifs_sb, &tmp_list,
3058                                                 ctx);
3059
3060                                         kref_put(&wdata->refcount,
3061                                                 cifs_uncached_writedata_release);
3062                                 }
3063
3064                                 list_splice(&tmp_list, &ctx->list);
3065                                 goto restart_loop;
3066                         }
3067                 }
3068                 list_del_init(&wdata->list);
3069                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3070         }
3071
3072         cifs_stats_bytes_written(tcon, ctx->total_len);
3073         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3074
3075         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3076
3077         mutex_unlock(&ctx->aio_mutex);
3078
3079         if (ctx->iocb && ctx->iocb->ki_complete)
3080                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3081         else
3082                 complete(&ctx->done);
3083 }
3084
3085 static ssize_t __cifs_writev(
3086         struct kiocb *iocb, struct iov_iter *from, bool direct)
3087 {
3088         struct file *file = iocb->ki_filp;
3089         ssize_t total_written = 0;
3090         struct cifsFileInfo *cfile;
3091         struct cifs_tcon *tcon;
3092         struct cifs_sb_info *cifs_sb;
3093         struct cifs_aio_ctx *ctx;
3094         struct iov_iter saved_from = *from;
3095         size_t len = iov_iter_count(from);
3096         int rc;
3097
3098         /*
3099          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3100          * In this case, fall back to non-direct write function.
3101          * this could be improved by getting pages directly in ITER_KVEC
3102          */
3103         if (direct && from->type & ITER_KVEC) {
3104                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3105                 direct = false;
3106         }
3107
3108         rc = generic_write_checks(iocb, from);
3109         if (rc <= 0)
3110                 return rc;
3111
3112         cifs_sb = CIFS_FILE_SB(file);
3113         cfile = file->private_data;
3114         tcon = tlink_tcon(cfile->tlink);
3115
3116         if (!tcon->ses->server->ops->async_writev)
3117                 return -ENOSYS;
3118
3119         ctx = cifs_aio_ctx_alloc();
3120         if (!ctx)
3121                 return -ENOMEM;
3122
3123         ctx->cfile = cifsFileInfo_get(cfile);
3124
3125         if (!is_sync_kiocb(iocb))
3126                 ctx->iocb = iocb;
3127
3128         ctx->pos = iocb->ki_pos;
3129
3130         if (direct) {
3131                 ctx->direct_io = true;
3132                 ctx->iter = *from;
3133                 ctx->len = len;
3134         } else {
3135                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3136                 if (rc) {
3137                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3138                         return rc;
3139                 }
3140         }
3141
3142         /* grab a lock here due to read response handlers can access ctx */
3143         mutex_lock(&ctx->aio_mutex);
3144
3145         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3146                                   cfile, cifs_sb, &ctx->list, ctx);
3147
3148         /*
3149          * If at least one write was successfully sent, then discard any rc
3150          * value from the later writes. If the other write succeeds, then
3151          * we'll end up returning whatever was written. If it fails, then
3152          * we'll get a new rc value from that.
3153          */
3154         if (!list_empty(&ctx->list))
3155                 rc = 0;
3156
3157         mutex_unlock(&ctx->aio_mutex);
3158
3159         if (rc) {
3160                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3161                 return rc;
3162         }
3163
3164         if (!is_sync_kiocb(iocb)) {
3165                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3166                 return -EIOCBQUEUED;
3167         }
3168
3169         rc = wait_for_completion_killable(&ctx->done);
3170         if (rc) {
3171                 mutex_lock(&ctx->aio_mutex);
3172                 ctx->rc = rc = -EINTR;
3173                 total_written = ctx->total_len;
3174                 mutex_unlock(&ctx->aio_mutex);
3175         } else {
3176                 rc = ctx->rc;
3177                 total_written = ctx->total_len;
3178         }
3179
3180         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3181
3182         if (unlikely(!total_written))
3183                 return rc;
3184
3185         iocb->ki_pos += total_written;
3186         return total_written;
3187 }
3188
3189 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3190 {
3191         struct file *file = iocb->ki_filp;
3192
3193         cifs_revalidate_mapping(file->f_inode);
3194         return __cifs_writev(iocb, from, true);
3195 }
3196
3197 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3198 {
3199         return __cifs_writev(iocb, from, false);
3200 }
3201
3202 static ssize_t
3203 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3204 {
3205         struct file *file = iocb->ki_filp;
3206         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3207         struct inode *inode = file->f_mapping->host;
3208         struct cifsInodeInfo *cinode = CIFS_I(inode);
3209         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3210         ssize_t rc;
3211
3212         inode_lock(inode);
3213         /*
3214          * We need to hold the sem to be sure nobody modifies lock list
3215          * with a brlock that prevents writing.
3216          */
3217         down_read(&cinode->lock_sem);
3218
3219         rc = generic_write_checks(iocb, from);
3220         if (rc <= 0)
3221                 goto out;
3222
3223         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3224                                      server->vals->exclusive_lock_type, 0,
3225                                      NULL, CIFS_WRITE_OP))
3226                 rc = __generic_file_write_iter(iocb, from);
3227         else
3228                 rc = -EACCES;
3229 out:
3230         up_read(&cinode->lock_sem);
3231         inode_unlock(inode);
3232
3233         if (rc > 0)
3234                 rc = generic_write_sync(iocb, rc);
3235         return rc;
3236 }
3237
3238 ssize_t
3239 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3240 {
3241         struct inode *inode = file_inode(iocb->ki_filp);
3242         struct cifsInodeInfo *cinode = CIFS_I(inode);
3243         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3244         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3245                                                 iocb->ki_filp->private_data;
3246         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3247         ssize_t written;
3248
3249         written = cifs_get_writer(cinode);
3250         if (written)
3251                 return written;
3252
3253         if (CIFS_CACHE_WRITE(cinode)) {
3254                 if (cap_unix(tcon->ses) &&
3255                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3256                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3257                         written = generic_file_write_iter(iocb, from);
3258                         goto out;
3259                 }
3260                 written = cifs_writev(iocb, from);
3261                 goto out;
3262         }
3263         /*
3264          * For non-oplocked files in strict cache mode we need to write the data
3265          * to the server exactly from the pos to pos+len-1 rather than flush all
3266          * affected pages because it may cause a error with mandatory locks on
3267          * these pages but not on the region from pos to ppos+len-1.
3268          */
3269         written = cifs_user_writev(iocb, from);
3270         if (CIFS_CACHE_READ(cinode)) {
3271                 /*
3272                  * We have read level caching and we have just sent a write
3273                  * request to the server thus making data in the cache stale.
3274                  * Zap the cache and set oplock/lease level to NONE to avoid
3275                  * reading stale data from the cache. All subsequent read
3276                  * operations will read new data from the server.
3277                  */
3278                 cifs_zap_mapping(inode);
3279                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3280                          inode);
3281                 cinode->oplock = 0;
3282         }
3283 out:
3284         cifs_put_writer(cinode);
3285         return written;
3286 }
3287
3288 static struct cifs_readdata *
3289 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3290 {
3291         struct cifs_readdata *rdata;
3292
3293         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3294         if (rdata != NULL) {
3295                 rdata->pages = pages;
3296                 kref_init(&rdata->refcount);
3297                 INIT_LIST_HEAD(&rdata->list);
3298                 init_completion(&rdata->done);
3299                 INIT_WORK(&rdata->work, complete);
3300         }
3301
3302         return rdata;
3303 }
3304
3305 static struct cifs_readdata *
3306 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3307 {
3308         struct page **pages =
3309                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3310         struct cifs_readdata *ret = NULL;
3311
3312         if (pages) {
3313                 ret = cifs_readdata_direct_alloc(pages, complete);
3314                 if (!ret)
3315                         kfree(pages);
3316         }
3317
3318         return ret;
3319 }
3320
3321 void
3322 cifs_readdata_release(struct kref *refcount)
3323 {
3324         struct cifs_readdata *rdata = container_of(refcount,
3325                                         struct cifs_readdata, refcount);
3326 #ifdef CONFIG_CIFS_SMB_DIRECT
3327         if (rdata->mr) {
3328                 smbd_deregister_mr(rdata->mr);
3329                 rdata->mr = NULL;
3330         }
3331 #endif
3332         if (rdata->cfile)
3333                 cifsFileInfo_put(rdata->cfile);
3334
3335         kvfree(rdata->pages);
3336         kfree(rdata);
3337 }
3338
3339 static int
3340 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3341 {
3342         int rc = 0;
3343         struct page *page;
3344         unsigned int i;
3345
3346         for (i = 0; i < nr_pages; i++) {
3347                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3348                 if (!page) {
3349                         rc = -ENOMEM;
3350                         break;
3351                 }
3352                 rdata->pages[i] = page;
3353         }
3354
3355         if (rc) {
3356                 unsigned int nr_page_failed = i;
3357
3358                 for (i = 0; i < nr_page_failed; i++) {
3359                         put_page(rdata->pages[i]);
3360                         rdata->pages[i] = NULL;
3361                 }
3362         }
3363         return rc;
3364 }
3365
3366 static void
3367 cifs_uncached_readdata_release(struct kref *refcount)
3368 {
3369         struct cifs_readdata *rdata = container_of(refcount,
3370                                         struct cifs_readdata, refcount);
3371         unsigned int i;
3372
3373         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3374         for (i = 0; i < rdata->nr_pages; i++) {
3375                 put_page(rdata->pages[i]);
3376         }
3377         cifs_readdata_release(refcount);
3378 }
3379
3380 /**
3381  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3382  * @rdata:      the readdata response with list of pages holding data
3383  * @iter:       destination for our data
3384  *
3385  * This function copies data from a list of pages in a readdata response into
3386  * an array of iovecs. It will first calculate where the data should go
3387  * based on the info in the readdata and then copy the data into that spot.
3388  */
3389 static int
3390 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3391 {
3392         size_t remaining = rdata->got_bytes;
3393         unsigned int i;
3394
3395         for (i = 0; i < rdata->nr_pages; i++) {
3396                 struct page *page = rdata->pages[i];
3397                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3398                 size_t written;
3399
3400                 if (unlikely(iov_iter_is_pipe(iter))) {
3401                         void *addr = kmap_atomic(page);
3402
3403                         written = copy_to_iter(addr, copy, iter);
3404                         kunmap_atomic(addr);
3405                 } else
3406                         written = copy_page_to_iter(page, 0, copy, iter);
3407                 remaining -= written;
3408                 if (written < copy && iov_iter_count(iter) > 0)
3409                         break;
3410         }
3411         return remaining ? -EFAULT : 0;
3412 }
3413
3414 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3415
3416 static void
3417 cifs_uncached_readv_complete(struct work_struct *work)
3418 {
3419         struct cifs_readdata *rdata = container_of(work,
3420                                                 struct cifs_readdata, work);
3421
3422         complete(&rdata->done);
3423         collect_uncached_read_data(rdata->ctx);
3424         /* the below call can possibly free the last ref to aio ctx */
3425         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3426 }
3427
3428 static int
3429 uncached_fill_pages(struct TCP_Server_Info *server,
3430                     struct cifs_readdata *rdata, struct iov_iter *iter,
3431                     unsigned int len)
3432 {
3433         int result = 0;
3434         unsigned int i;
3435         unsigned int nr_pages = rdata->nr_pages;
3436         unsigned int page_offset = rdata->page_offset;
3437
3438         rdata->got_bytes = 0;
3439         rdata->tailsz = PAGE_SIZE;
3440         for (i = 0; i < nr_pages; i++) {
3441                 struct page *page = rdata->pages[i];
3442                 size_t n;
3443                 unsigned int segment_size = rdata->pagesz;
3444
3445                 if (i == 0)
3446                         segment_size -= page_offset;
3447                 else
3448                         page_offset = 0;
3449
3450
3451                 if (len <= 0) {
3452                         /* no need to hold page hostage */
3453                         rdata->pages[i] = NULL;
3454                         rdata->nr_pages--;
3455                         put_page(page);
3456                         continue;
3457                 }
3458
3459                 n = len;
3460                 if (len >= segment_size)
3461                         /* enough data to fill the page */
3462                         n = segment_size;
3463                 else
3464                         rdata->tailsz = len;
3465                 len -= n;
3466
3467                 if (iter)
3468                         result = copy_page_from_iter(
3469                                         page, page_offset, n, iter);
3470 #ifdef CONFIG_CIFS_SMB_DIRECT
3471                 else if (rdata->mr)
3472                         result = n;
3473 #endif
3474                 else
3475                         result = cifs_read_page_from_socket(
3476                                         server, page, page_offset, n);
3477                 if (result < 0)
3478                         break;
3479
3480                 rdata->got_bytes += result;
3481         }
3482
3483         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3484                                                 rdata->got_bytes : result;
3485 }
3486
3487 static int
3488 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3489                               struct cifs_readdata *rdata, unsigned int len)
3490 {
3491         return uncached_fill_pages(server, rdata, NULL, len);
3492 }
3493
3494 static int
3495 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3496                               struct cifs_readdata *rdata,
3497                               struct iov_iter *iter)
3498 {
3499         return uncached_fill_pages(server, rdata, iter, iter->count);
3500 }
3501
3502 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3503                         struct list_head *rdata_list,
3504                         struct cifs_aio_ctx *ctx)
3505 {
3506         unsigned int rsize;
3507         struct cifs_credits credits;
3508         int rc;
3509         struct TCP_Server_Info *server =
3510                 tlink_tcon(rdata->cfile->tlink)->ses->server;
3511
3512         do {
3513                 if (rdata->cfile->invalidHandle) {
3514                         rc = cifs_reopen_file(rdata->cfile, true);
3515                         if (rc == -EAGAIN)
3516                                 continue;
3517                         else if (rc)
3518                                 break;
3519                 }
3520
3521                 /*
3522                  * Wait for credits to resend this rdata.
3523                  * Note: we are attempting to resend the whole rdata not in
3524                  * segments
3525                  */
3526                 do {
3527                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3528                                                 &rsize, &credits);
3529
3530                         if (rc)
3531                                 goto fail;
3532
3533                         if (rsize < rdata->bytes) {
3534                                 add_credits_and_wake_if(server, &credits, 0);
3535                                 msleep(1000);
3536                         }
3537                 } while (rsize < rdata->bytes);
3538                 rdata->credits = credits;
3539
3540                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3541                 if (!rc) {
3542                         if (rdata->cfile->invalidHandle)
3543                                 rc = -EAGAIN;
3544                         else
3545                                 rc = server->ops->async_readv(rdata);
3546                 }
3547
3548                 /* If the read was successfully sent, we are done */
3549                 if (!rc) {
3550                         /* Add to aio pending list */
3551                         list_add_tail(&rdata->list, rdata_list);
3552                         return 0;
3553                 }
3554
3555                 /* Roll back credits and retry if needed */
3556                 add_credits_and_wake_if(server, &rdata->credits, 0);
3557         } while (rc == -EAGAIN);
3558
3559 fail:
3560         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3561         return rc;
3562 }
3563
3564 static int
3565 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3566                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3567                      struct cifs_aio_ctx *ctx)
3568 {
3569         struct cifs_readdata *rdata;
3570         unsigned int npages, rsize;
3571         struct cifs_credits credits_on_stack;
3572         struct cifs_credits *credits = &credits_on_stack;
3573         size_t cur_len;
3574         int rc;
3575         pid_t pid;
3576         struct TCP_Server_Info *server;
3577         struct page **pagevec;
3578         size_t start;
3579         struct iov_iter direct_iov = ctx->iter;
3580
3581         server = tlink_tcon(open_file->tlink)->ses->server;
3582
3583         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3584                 pid = open_file->pid;
3585         else
3586                 pid = current->tgid;
3587
3588         if (ctx->direct_io)
3589                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3590
3591         do {
3592                 if (open_file->invalidHandle) {
3593                         rc = cifs_reopen_file(open_file, true);
3594                         if (rc == -EAGAIN)
3595                                 continue;
3596                         else if (rc)
3597                                 break;
3598                 }
3599
3600                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3601                                                    &rsize, credits);
3602                 if (rc)
3603                         break;
3604
3605                 cur_len = min_t(const size_t, len, rsize);
3606
3607                 if (ctx->direct_io) {
3608                         ssize_t result;
3609
3610                         result = iov_iter_get_pages_alloc(
3611                                         &direct_iov, &pagevec,
3612                                         cur_len, &start);
3613                         if (result < 0) {
3614                                 cifs_dbg(VFS,
3615                                         "couldn't get user pages (rc=%zd)"
3616                                         " iter type %d"
3617                                         " iov_offset %zd count %zd\n",
3618                                         result, direct_iov.type,
3619                                         direct_iov.iov_offset,
3620                                         direct_iov.count);
3621                                 dump_stack();
3622
3623                                 rc = result;
3624                                 add_credits_and_wake_if(server, credits, 0);
3625                                 break;
3626                         }
3627                         cur_len = (size_t)result;
3628                         iov_iter_advance(&direct_iov, cur_len);
3629
3630                         rdata = cifs_readdata_direct_alloc(
3631                                         pagevec, cifs_uncached_readv_complete);
3632                         if (!rdata) {
3633                                 add_credits_and_wake_if(server, credits, 0);
3634                                 rc = -ENOMEM;
3635                                 break;
3636                         }
3637
3638                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3639                         rdata->page_offset = start;
3640                         rdata->tailsz = npages > 1 ?
3641                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3642                                 cur_len;
3643
3644                 } else {
3645
3646                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3647                         /* allocate a readdata struct */
3648                         rdata = cifs_readdata_alloc(npages,
3649                                             cifs_uncached_readv_complete);
3650                         if (!rdata) {
3651                                 add_credits_and_wake_if(server, credits, 0);
3652                                 rc = -ENOMEM;
3653                                 break;
3654                         }
3655
3656                         rc = cifs_read_allocate_pages(rdata, npages);
3657                         if (rc) {
3658                                 kvfree(rdata->pages);
3659                                 kfree(rdata);
3660                                 add_credits_and_wake_if(server, credits, 0);
3661                                 break;
3662                         }
3663
3664                         rdata->tailsz = PAGE_SIZE;
3665                 }
3666
3667                 rdata->cfile = cifsFileInfo_get(open_file);
3668                 rdata->nr_pages = npages;
3669                 rdata->offset = offset;
3670                 rdata->bytes = cur_len;
3671                 rdata->pid = pid;
3672                 rdata->pagesz = PAGE_SIZE;
3673                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3674                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3675                 rdata->credits = credits_on_stack;
3676                 rdata->ctx = ctx;
3677                 kref_get(&ctx->refcount);
3678
3679                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3680
3681                 if (!rc) {
3682                         if (rdata->cfile->invalidHandle)
3683                                 rc = -EAGAIN;
3684                         else
3685                                 rc = server->ops->async_readv(rdata);
3686                 }
3687
3688                 if (rc) {
3689                         add_credits_and_wake_if(server, &rdata->credits, 0);
3690                         kref_put(&rdata->refcount,
3691                                 cifs_uncached_readdata_release);
3692                         if (rc == -EAGAIN) {
3693                                 iov_iter_revert(&direct_iov, cur_len);
3694                                 continue;
3695                         }
3696                         break;
3697                 }
3698
3699                 list_add_tail(&rdata->list, rdata_list);
3700                 offset += cur_len;
3701                 len -= cur_len;
3702         } while (len > 0);
3703
3704         return rc;
3705 }
3706
3707 static void
3708 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3709 {
3710         struct cifs_readdata *rdata, *tmp;
3711         struct iov_iter *to = &ctx->iter;
3712         struct cifs_sb_info *cifs_sb;
3713         int rc;
3714
3715         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3716
3717         mutex_lock(&ctx->aio_mutex);
3718
3719         if (list_empty(&ctx->list)) {
3720                 mutex_unlock(&ctx->aio_mutex);
3721                 return;
3722         }
3723
3724         rc = ctx->rc;
3725         /* the loop below should proceed in the order of increasing offsets */
3726 again:
3727         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3728                 if (!rc) {
3729                         if (!try_wait_for_completion(&rdata->done)) {
3730                                 mutex_unlock(&ctx->aio_mutex);
3731                                 return;
3732                         }
3733
3734                         if (rdata->result == -EAGAIN) {
3735                                 /* resend call if it's a retryable error */
3736                                 struct list_head tmp_list;
3737                                 unsigned int got_bytes = rdata->got_bytes;
3738
3739                                 list_del_init(&rdata->list);
3740                                 INIT_LIST_HEAD(&tmp_list);
3741
3742                                 /*
3743                                  * Got a part of data and then reconnect has
3744                                  * happened -- fill the buffer and continue
3745                                  * reading.
3746                                  */
3747                                 if (got_bytes && got_bytes < rdata->bytes) {
3748                                         rc = 0;
3749                                         if (!ctx->direct_io)
3750                                                 rc = cifs_readdata_to_iov(rdata, to);
3751                                         if (rc) {
3752                                                 kref_put(&rdata->refcount,
3753                                                         cifs_uncached_readdata_release);
3754                                                 continue;
3755                                         }
3756                                 }
3757
3758                                 if (ctx->direct_io) {
3759                                         /*
3760                                          * Re-use rdata as this is a
3761                                          * direct I/O
3762                                          */
3763                                         rc = cifs_resend_rdata(
3764                                                 rdata,
3765                                                 &tmp_list, ctx);
3766                                 } else {
3767                                         rc = cifs_send_async_read(
3768                                                 rdata->offset + got_bytes,
3769                                                 rdata->bytes - got_bytes,
3770                                                 rdata->cfile, cifs_sb,
3771                                                 &tmp_list, ctx);
3772
3773                                         kref_put(&rdata->refcount,
3774                                                 cifs_uncached_readdata_release);
3775                                 }
3776
3777                                 list_splice(&tmp_list, &ctx->list);
3778
3779                                 goto again;
3780                         } else if (rdata->result)
3781                                 rc = rdata->result;
3782                         else if (!ctx->direct_io)
3783                                 rc = cifs_readdata_to_iov(rdata, to);
3784
3785                         /* if there was a short read -- discard anything left */
3786                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3787                                 rc = -ENODATA;
3788
3789                         ctx->total_len += rdata->got_bytes;
3790                 }
3791                 list_del_init(&rdata->list);
3792                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3793         }
3794
3795         if (!ctx->direct_io)
3796                 ctx->total_len = ctx->len - iov_iter_count(to);
3797
3798         /* mask nodata case */
3799         if (rc == -ENODATA)
3800                 rc = 0;
3801
3802         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3803
3804         mutex_unlock(&ctx->aio_mutex);
3805
3806         if (ctx->iocb && ctx->iocb->ki_complete)
3807                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3808         else
3809                 complete(&ctx->done);
3810 }
3811
3812 static ssize_t __cifs_readv(
3813         struct kiocb *iocb, struct iov_iter *to, bool direct)
3814 {
3815         size_t len;
3816         struct file *file = iocb->ki_filp;
3817         struct cifs_sb_info *cifs_sb;
3818         struct cifsFileInfo *cfile;
3819         struct cifs_tcon *tcon;
3820         ssize_t rc, total_read = 0;
3821         loff_t offset = iocb->ki_pos;
3822         struct cifs_aio_ctx *ctx;
3823
3824         /*
3825          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3826          * fall back to data copy read path
3827          * this could be improved by getting pages directly in ITER_KVEC
3828          */
3829         if (direct && to->type & ITER_KVEC) {
3830                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3831                 direct = false;
3832         }
3833
3834         len = iov_iter_count(to);
3835         if (!len)
3836                 return 0;
3837
3838         cifs_sb = CIFS_FILE_SB(file);
3839         cfile = file->private_data;
3840         tcon = tlink_tcon(cfile->tlink);
3841
3842         if (!tcon->ses->server->ops->async_readv)
3843                 return -ENOSYS;
3844
3845         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3846                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3847
3848         ctx = cifs_aio_ctx_alloc();
3849         if (!ctx)
3850                 return -ENOMEM;
3851
3852         ctx->cfile = cifsFileInfo_get(cfile);
3853
3854         if (!is_sync_kiocb(iocb))
3855                 ctx->iocb = iocb;
3856
3857         if (iter_is_iovec(to))
3858                 ctx->should_dirty = true;
3859
3860         if (direct) {
3861                 ctx->pos = offset;
3862                 ctx->direct_io = true;
3863                 ctx->iter = *to;
3864                 ctx->len = len;
3865         } else {
3866                 rc = setup_aio_ctx_iter(ctx, to, READ);
3867                 if (rc) {
3868                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3869                         return rc;
3870                 }
3871                 len = ctx->len;
3872         }
3873
3874         if (direct) {
3875                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
3876                                                   offset, offset + len - 1);
3877                 if (rc) {
3878                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3879                         return -EAGAIN;
3880                 }
3881         }
3882
3883         /* grab a lock here due to read response handlers can access ctx */
3884         mutex_lock(&ctx->aio_mutex);
3885
3886         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3887
3888         /* if at least one read request send succeeded, then reset rc */
3889         if (!list_empty(&ctx->list))
3890                 rc = 0;
3891
3892         mutex_unlock(&ctx->aio_mutex);
3893
3894         if (rc) {
3895                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3896                 return rc;
3897         }
3898
3899         if (!is_sync_kiocb(iocb)) {
3900                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3901                 return -EIOCBQUEUED;
3902         }
3903
3904         rc = wait_for_completion_killable(&ctx->done);
3905         if (rc) {
3906                 mutex_lock(&ctx->aio_mutex);
3907                 ctx->rc = rc = -EINTR;
3908                 total_read = ctx->total_len;
3909                 mutex_unlock(&ctx->aio_mutex);
3910         } else {
3911                 rc = ctx->rc;
3912                 total_read = ctx->total_len;
3913         }
3914
3915         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3916
3917         if (total_read) {
3918                 iocb->ki_pos += total_read;
3919                 return total_read;
3920         }
3921         return rc;
3922 }
3923
3924 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3925 {
3926         return __cifs_readv(iocb, to, true);
3927 }
3928
3929 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3930 {
3931         return __cifs_readv(iocb, to, false);
3932 }
3933
3934 ssize_t
3935 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3936 {
3937         struct inode *inode = file_inode(iocb->ki_filp);
3938         struct cifsInodeInfo *cinode = CIFS_I(inode);
3939         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3940         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3941                                                 iocb->ki_filp->private_data;
3942         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3943         int rc = -EACCES;
3944
3945         /*
3946          * In strict cache mode we need to read from the server all the time
3947          * if we don't have level II oplock because the server can delay mtime
3948          * change - so we can't make a decision about inode invalidating.
3949          * And we can also fail with pagereading if there are mandatory locks
3950          * on pages affected by this read but not on the region from pos to
3951          * pos+len-1.
3952          */
3953         if (!CIFS_CACHE_READ(cinode))
3954                 return cifs_user_readv(iocb, to);
3955
3956         if (cap_unix(tcon->ses) &&
3957             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3958             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3959                 return generic_file_read_iter(iocb, to);
3960
3961         /*
3962          * We need to hold the sem to be sure nobody modifies lock list
3963          * with a brlock that prevents reading.
3964          */
3965         down_read(&cinode->lock_sem);
3966         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3967                                      tcon->ses->server->vals->shared_lock_type,
3968                                      0, NULL, CIFS_READ_OP))
3969                 rc = generic_file_read_iter(iocb, to);
3970         up_read(&cinode->lock_sem);
3971         return rc;
3972 }
3973
3974 static ssize_t
3975 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3976 {
3977         int rc = -EACCES;
3978         unsigned int bytes_read = 0;
3979         unsigned int total_read;
3980         unsigned int current_read_size;
3981         unsigned int rsize;
3982         struct cifs_sb_info *cifs_sb;
3983         struct cifs_tcon *tcon;
3984         struct TCP_Server_Info *server;
3985         unsigned int xid;
3986         char *cur_offset;
3987         struct cifsFileInfo *open_file;
3988         struct cifs_io_parms io_parms;
3989         int buf_type = CIFS_NO_BUFFER;
3990         __u32 pid;
3991
3992         xid = get_xid();
3993         cifs_sb = CIFS_FILE_SB(file);
3994
3995         /* FIXME: set up handlers for larger reads and/or convert to async */
3996         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3997
3998         if (file->private_data == NULL) {
3999                 rc = -EBADF;
4000                 free_xid(xid);
4001                 return rc;
4002         }
4003         open_file = file->private_data;
4004         tcon = tlink_tcon(open_file->tlink);
4005         server = tcon->ses->server;
4006
4007         if (!server->ops->sync_read) {
4008                 free_xid(xid);
4009                 return -ENOSYS;
4010         }
4011
4012         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4013                 pid = open_file->pid;
4014         else
4015                 pid = current->tgid;
4016
4017         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4018                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4019
4020         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4021              total_read += bytes_read, cur_offset += bytes_read) {
4022                 do {
4023                         current_read_size = min_t(uint, read_size - total_read,
4024                                                   rsize);
4025                         /*
4026                          * For windows me and 9x we do not want to request more
4027                          * than it negotiated since it will refuse the read
4028                          * then.
4029                          */
4030                         if (!(tcon->ses->capabilities &
4031                                 tcon->ses->server->vals->cap_large_files)) {
4032                                 current_read_size = min_t(uint,
4033                                         current_read_size, CIFSMaxBufSize);
4034                         }
4035                         if (open_file->invalidHandle) {
4036                                 rc = cifs_reopen_file(open_file, true);
4037                                 if (rc != 0)
4038                                         break;
4039                         }
4040                         io_parms.pid = pid;
4041                         io_parms.tcon = tcon;
4042                         io_parms.offset = *offset;
4043                         io_parms.length = current_read_size;
4044                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4045                                                     &bytes_read, &cur_offset,
4046                                                     &buf_type);
4047                 } while (rc == -EAGAIN);
4048
4049                 if (rc || (bytes_read == 0)) {
4050                         if (total_read) {
4051                                 break;
4052                         } else {
4053                                 free_xid(xid);
4054                                 return rc;
4055                         }
4056                 } else {
4057                         cifs_stats_bytes_read(tcon, total_read);
4058                         *offset += bytes_read;
4059                 }
4060         }
4061         free_xid(xid);
4062         return total_read;
4063 }
4064
4065 /*
4066  * If the page is mmap'ed into a process' page tables, then we need to make
4067  * sure that it doesn't change while being written back.
4068  */
4069 static vm_fault_t
4070 cifs_page_mkwrite(struct vm_fault *vmf)
4071 {
4072         struct page *page = vmf->page;
4073
4074         lock_page(page);
4075         return VM_FAULT_LOCKED;
4076 }
4077
4078 static const struct vm_operations_struct cifs_file_vm_ops = {
4079         .fault = filemap_fault,
4080         .map_pages = filemap_map_pages,
4081         .page_mkwrite = cifs_page_mkwrite,
4082 };
4083
4084 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4085 {
4086         int xid, rc = 0;
4087         struct inode *inode = file_inode(file);
4088
4089         xid = get_xid();
4090
4091         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4092                 rc = cifs_zap_mapping(inode);
4093         if (!rc)
4094                 rc = generic_file_mmap(file, vma);
4095         if (!rc)
4096                 vma->vm_ops = &cifs_file_vm_ops;
4097
4098         free_xid(xid);
4099         return rc;
4100 }
4101
4102 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4103 {
4104         int rc, xid;
4105
4106         xid = get_xid();
4107
4108         rc = cifs_revalidate_file(file);
4109         if (rc)
4110                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4111                          rc);
4112         if (!rc)
4113                 rc = generic_file_mmap(file, vma);
4114         if (!rc)
4115                 vma->vm_ops = &cifs_file_vm_ops;
4116
4117         free_xid(xid);
4118         return rc;
4119 }
4120
4121 static void
4122 cifs_readv_complete(struct work_struct *work)
4123 {
4124         unsigned int i, got_bytes;
4125         struct cifs_readdata *rdata = container_of(work,
4126                                                 struct cifs_readdata, work);
4127
4128         got_bytes = rdata->got_bytes;
4129         for (i = 0; i < rdata->nr_pages; i++) {
4130                 struct page *page = rdata->pages[i];
4131
4132                 lru_cache_add_file(page);
4133
4134                 if (rdata->result == 0 ||
4135                     (rdata->result == -EAGAIN && got_bytes)) {
4136                         flush_dcache_page(page);
4137                         SetPageUptodate(page);
4138                 }
4139
4140                 unlock_page(page);
4141
4142                 if (rdata->result == 0 ||
4143                     (rdata->result == -EAGAIN && got_bytes))
4144                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4145
4146                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4147
4148                 put_page(page);
4149                 rdata->pages[i] = NULL;
4150         }
4151         kref_put(&rdata->refcount, cifs_readdata_release);
4152 }
4153
4154 static int
4155 readpages_fill_pages(struct TCP_Server_Info *server,
4156                      struct cifs_readdata *rdata, struct iov_iter *iter,
4157                      unsigned int len)
4158 {
4159         int result = 0;
4160         unsigned int i;
4161         u64 eof;
4162         pgoff_t eof_index;
4163         unsigned int nr_pages = rdata->nr_pages;
4164         unsigned int page_offset = rdata->page_offset;
4165
4166         /* determine the eof that the server (probably) has */
4167         eof = CIFS_I(rdata->mapping->host)->server_eof;
4168         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4169         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4170
4171         rdata->got_bytes = 0;
4172         rdata->tailsz = PAGE_SIZE;
4173         for (i = 0; i < nr_pages; i++) {
4174                 struct page *page = rdata->pages[i];
4175                 unsigned int to_read = rdata->pagesz;
4176                 size_t n;
4177
4178                 if (i == 0)
4179                         to_read -= page_offset;
4180                 else
4181                         page_offset = 0;
4182
4183                 n = to_read;
4184
4185                 if (len >= to_read) {
4186                         len -= to_read;
4187                 } else if (len > 0) {
4188                         /* enough for partial page, fill and zero the rest */
4189                         zero_user(page, len + page_offset, to_read - len);
4190                         n = rdata->tailsz = len;
4191                         len = 0;
4192                 } else if (page->index > eof_index) {
4193                         /*
4194                          * The VFS will not try to do readahead past the
4195                          * i_size, but it's possible that we have outstanding
4196                          * writes with gaps in the middle and the i_size hasn't
4197                          * caught up yet. Populate those with zeroed out pages
4198                          * to prevent the VFS from repeatedly attempting to
4199                          * fill them until the writes are flushed.
4200                          */
4201                         zero_user(page, 0, PAGE_SIZE);
4202                         lru_cache_add_file(page);
4203                         flush_dcache_page(page);
4204                         SetPageUptodate(page);
4205                         unlock_page(page);
4206                         put_page(page);
4207                         rdata->pages[i] = NULL;
4208                         rdata->nr_pages--;
4209                         continue;
4210                 } else {
4211                         /* no need to hold page hostage */
4212                         lru_cache_add_file(page);
4213                         unlock_page(page);
4214                         put_page(page);
4215                         rdata->pages[i] = NULL;
4216                         rdata->nr_pages--;
4217                         continue;
4218                 }
4219
4220                 if (iter)
4221                         result = copy_page_from_iter(
4222                                         page, page_offset, n, iter);
4223 #ifdef CONFIG_CIFS_SMB_DIRECT
4224                 else if (rdata->mr)
4225                         result = n;
4226 #endif
4227                 else
4228                         result = cifs_read_page_from_socket(
4229                                         server, page, page_offset, n);
4230                 if (result < 0)
4231                         break;
4232
4233                 rdata->got_bytes += result;
4234         }
4235
4236         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4237                                                 rdata->got_bytes : result;
4238 }
4239
4240 static int
4241 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4242                                struct cifs_readdata *rdata, unsigned int len)
4243 {
4244         return readpages_fill_pages(server, rdata, NULL, len);
4245 }
4246
4247 static int
4248 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4249                                struct cifs_readdata *rdata,
4250                                struct iov_iter *iter)
4251 {
4252         return readpages_fill_pages(server, rdata, iter, iter->count);
4253 }
4254
4255 static int
4256 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4257                     unsigned int rsize, struct list_head *tmplist,
4258                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4259 {
4260         struct page *page, *tpage;
4261         unsigned int expected_index;
4262         int rc;
4263         gfp_t gfp = readahead_gfp_mask(mapping);
4264
4265         INIT_LIST_HEAD(tmplist);
4266
4267         page = lru_to_page(page_list);
4268
4269         /*
4270          * Lock the page and put it in the cache. Since no one else
4271          * should have access to this page, we're safe to simply set
4272          * PG_locked without checking it first.
4273          */
4274         __SetPageLocked(page);
4275         rc = add_to_page_cache_locked(page, mapping,
4276                                       page->index, gfp);
4277
4278         /* give up if we can't stick it in the cache */
4279         if (rc) {
4280                 __ClearPageLocked(page);
4281                 return rc;
4282         }
4283
4284         /* move first page to the tmplist */
4285         *offset = (loff_t)page->index << PAGE_SHIFT;
4286         *bytes = PAGE_SIZE;
4287         *nr_pages = 1;
4288         list_move_tail(&page->lru, tmplist);
4289
4290         /* now try and add more pages onto the request */
4291         expected_index = page->index + 1;
4292         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4293                 /* discontinuity ? */
4294                 if (page->index != expected_index)
4295                         break;
4296
4297                 /* would this page push the read over the rsize? */
4298                 if (*bytes + PAGE_SIZE > rsize)
4299                         break;
4300
4301                 __SetPageLocked(page);
4302                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4303                 if (rc) {
4304                         __ClearPageLocked(page);
4305                         break;
4306                 }
4307                 list_move_tail(&page->lru, tmplist);
4308                 (*bytes) += PAGE_SIZE;
4309                 expected_index++;
4310                 (*nr_pages)++;
4311         }
4312         return rc;
4313 }
4314
4315 static int cifs_readpages(struct file *file, struct address_space *mapping,
4316         struct list_head *page_list, unsigned num_pages)
4317 {
4318         int rc;
4319         int err = 0;
4320         struct list_head tmplist;
4321         struct cifsFileInfo *open_file = file->private_data;
4322         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4323         struct TCP_Server_Info *server;
4324         pid_t pid;
4325         unsigned int xid;
4326
4327         xid = get_xid();
4328         /*
4329          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4330          * immediately if the cookie is negative
4331          *
4332          * After this point, every page in the list might have PG_fscache set,
4333          * so we will need to clean that up off of every page we don't use.
4334          */
4335         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4336                                          &num_pages);
4337         if (rc == 0) {
4338                 free_xid(xid);
4339                 return rc;
4340         }
4341
4342         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4343                 pid = open_file->pid;
4344         else
4345                 pid = current->tgid;
4346
4347         rc = 0;
4348         server = tlink_tcon(open_file->tlink)->ses->server;
4349
4350         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4351                  __func__, file, mapping, num_pages);
4352
4353         /*
4354          * Start with the page at end of list and move it to private
4355          * list. Do the same with any following pages until we hit
4356          * the rsize limit, hit an index discontinuity, or run out of
4357          * pages. Issue the async read and then start the loop again
4358          * until the list is empty.
4359          *
4360          * Note that list order is important. The page_list is in
4361          * the order of declining indexes. When we put the pages in
4362          * the rdata->pages, then we want them in increasing order.
4363          */
4364         while (!list_empty(page_list) && !err) {
4365                 unsigned int i, nr_pages, bytes, rsize;
4366                 loff_t offset;
4367                 struct page *page, *tpage;
4368                 struct cifs_readdata *rdata;
4369                 struct cifs_credits credits_on_stack;
4370                 struct cifs_credits *credits = &credits_on_stack;
4371
4372                 if (open_file->invalidHandle) {
4373                         rc = cifs_reopen_file(open_file, true);
4374                         if (rc == -EAGAIN)
4375                                 continue;
4376                         else if (rc)
4377                                 break;
4378                 }
4379
4380                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4381                                                    &rsize, credits);
4382                 if (rc)
4383                         break;
4384
4385                 /*
4386                  * Give up immediately if rsize is too small to read an entire
4387                  * page. The VFS will fall back to readpage. We should never
4388                  * reach this point however since we set ra_pages to 0 when the
4389                  * rsize is smaller than a cache page.
4390                  */
4391                 if (unlikely(rsize < PAGE_SIZE)) {
4392                         add_credits_and_wake_if(server, credits, 0);
4393                         free_xid(xid);
4394                         return 0;
4395                 }
4396
4397                 nr_pages = 0;
4398                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4399                                          &nr_pages, &offset, &bytes);
4400                 if (!nr_pages) {
4401                         add_credits_and_wake_if(server, credits, 0);
4402                         break;
4403                 }
4404
4405                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4406                 if (!rdata) {
4407                         /* best to give up if we're out of mem */
4408                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4409                                 list_del(&page->lru);
4410                                 lru_cache_add_file(page);
4411                                 unlock_page(page);
4412                                 put_page(page);
4413                         }
4414                         rc = -ENOMEM;
4415                         add_credits_and_wake_if(server, credits, 0);
4416                         break;
4417                 }
4418
4419                 rdata->cfile = cifsFileInfo_get(open_file);
4420                 rdata->mapping = mapping;
4421                 rdata->offset = offset;
4422                 rdata->bytes = bytes;
4423                 rdata->pid = pid;
4424                 rdata->pagesz = PAGE_SIZE;
4425                 rdata->tailsz = PAGE_SIZE;
4426                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4427                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4428                 rdata->credits = credits_on_stack;
4429
4430                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4431                         list_del(&page->lru);
4432                         rdata->pages[rdata->nr_pages++] = page;
4433                 }
4434
4435                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4436
4437                 if (!rc) {
4438                         if (rdata->cfile->invalidHandle)
4439                                 rc = -EAGAIN;
4440                         else
4441                                 rc = server->ops->async_readv(rdata);
4442                 }
4443
4444                 if (rc) {
4445                         add_credits_and_wake_if(server, &rdata->credits, 0);
4446                         for (i = 0; i < rdata->nr_pages; i++) {
4447                                 page = rdata->pages[i];
4448                                 lru_cache_add_file(page);
4449                                 unlock_page(page);
4450                                 put_page(page);
4451                         }
4452                         /* Fallback to the readpage in error/reconnect cases */
4453                         kref_put(&rdata->refcount, cifs_readdata_release);
4454                         break;
4455                 }
4456
4457                 kref_put(&rdata->refcount, cifs_readdata_release);
4458         }
4459
4460         /* Any pages that have been shown to fscache but didn't get added to
4461          * the pagecache must be uncached before they get returned to the
4462          * allocator.
4463          */
4464         cifs_fscache_readpages_cancel(mapping->host, page_list);
4465         free_xid(xid);
4466         return rc;
4467 }
4468
4469 /*
4470  * cifs_readpage_worker must be called with the page pinned
4471  */
4472 static int cifs_readpage_worker(struct file *file, struct page *page,
4473         loff_t *poffset)
4474 {
4475         char *read_data;
4476         int rc;
4477
4478         /* Is the page cached? */
4479         rc = cifs_readpage_from_fscache(file_inode(file), page);
4480         if (rc == 0)
4481                 goto read_complete;
4482
4483         read_data = kmap(page);
4484         /* for reads over a certain size could initiate async read ahead */
4485
4486         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4487
4488         if (rc < 0)
4489                 goto io_error;
4490         else
4491                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4492
4493         /* we do not want atime to be less than mtime, it broke some apps */
4494         file_inode(file)->i_atime = current_time(file_inode(file));
4495         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4496                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4497         else
4498                 file_inode(file)->i_atime = current_time(file_inode(file));
4499
4500         if (PAGE_SIZE > rc)
4501                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4502
4503         flush_dcache_page(page);
4504         SetPageUptodate(page);
4505
4506         /* send this page to the cache */
4507         cifs_readpage_to_fscache(file_inode(file), page);
4508
4509         rc = 0;
4510
4511 io_error:
4512         kunmap(page);
4513
4514 read_complete:
4515         unlock_page(page);
4516         return rc;
4517 }
4518
4519 static int cifs_readpage(struct file *file, struct page *page)
4520 {
4521         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4522         int rc = -EACCES;
4523         unsigned int xid;
4524
4525         xid = get_xid();
4526
4527         if (file->private_data == NULL) {
4528                 rc = -EBADF;
4529                 free_xid(xid);
4530                 return rc;
4531         }
4532
4533         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4534                  page, (int)offset, (int)offset);
4535
4536         rc = cifs_readpage_worker(file, page, &offset);
4537
4538         free_xid(xid);
4539         return rc;
4540 }
4541
4542 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4543 {
4544         struct cifsFileInfo *open_file;
4545
4546         spin_lock(&cifs_inode->open_file_lock);
4547         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4548                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4549                         spin_unlock(&cifs_inode->open_file_lock);
4550                         return 1;
4551                 }
4552         }
4553         spin_unlock(&cifs_inode->open_file_lock);
4554         return 0;
4555 }
4556
4557 /* We do not want to update the file size from server for inodes
4558    open for write - to avoid races with writepage extending
4559    the file - in the future we could consider allowing
4560    refreshing the inode only on increases in the file size
4561    but this is tricky to do without racing with writebehind
4562    page caching in the current Linux kernel design */
4563 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4564 {
4565         if (!cifsInode)
4566                 return true;
4567
4568         if (is_inode_writable(cifsInode)) {
4569                 /* This inode is open for write at least once */
4570                 struct cifs_sb_info *cifs_sb;
4571
4572                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4573                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4574                         /* since no page cache to corrupt on directio
4575                         we can change size safely */
4576                         return true;
4577                 }
4578
4579                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4580                         return true;
4581
4582                 return false;
4583         } else
4584                 return true;
4585 }
4586
4587 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4588                         loff_t pos, unsigned len, unsigned flags,
4589                         struct page **pagep, void **fsdata)
4590 {
4591         int oncethru = 0;
4592         pgoff_t index = pos >> PAGE_SHIFT;
4593         loff_t offset = pos & (PAGE_SIZE - 1);
4594         loff_t page_start = pos & PAGE_MASK;
4595         loff_t i_size;
4596         struct page *page;
4597         int rc = 0;
4598
4599         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4600
4601 start:
4602         page = grab_cache_page_write_begin(mapping, index, flags);
4603         if (!page) {
4604                 rc = -ENOMEM;
4605                 goto out;
4606         }
4607
4608         if (PageUptodate(page))
4609                 goto out;
4610
4611         /*
4612          * If we write a full page it will be up to date, no need to read from
4613          * the server. If the write is short, we'll end up doing a sync write
4614          * instead.
4615          */
4616         if (len == PAGE_SIZE)
4617                 goto out;
4618
4619         /*
4620          * optimize away the read when we have an oplock, and we're not
4621          * expecting to use any of the data we'd be reading in. That
4622          * is, when the page lies beyond the EOF, or straddles the EOF
4623          * and the write will cover all of the existing data.
4624          */
4625         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4626                 i_size = i_size_read(mapping->host);
4627                 if (page_start >= i_size ||
4628                     (offset == 0 && (pos + len) >= i_size)) {
4629                         zero_user_segments(page, 0, offset,
4630                                            offset + len,
4631                                            PAGE_SIZE);
4632                         /*
4633                          * PageChecked means that the parts of the page
4634                          * to which we're not writing are considered up
4635                          * to date. Once the data is copied to the
4636                          * page, it can be set uptodate.
4637                          */
4638                         SetPageChecked(page);
4639                         goto out;
4640                 }
4641         }
4642
4643         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4644                 /*
4645                  * might as well read a page, it is fast enough. If we get
4646                  * an error, we don't need to return it. cifs_write_end will
4647                  * do a sync write instead since PG_uptodate isn't set.
4648                  */
4649                 cifs_readpage_worker(file, page, &page_start);
4650                 put_page(page);
4651                 oncethru = 1;
4652                 goto start;
4653         } else {
4654                 /* we could try using another file handle if there is one -
4655                    but how would we lock it to prevent close of that handle
4656                    racing with this read? In any case
4657                    this will be written out by write_end so is fine */
4658         }
4659 out:
4660         *pagep = page;
4661         return rc;
4662 }
4663
4664 static int cifs_release_page(struct page *page, gfp_t gfp)
4665 {
4666         if (PagePrivate(page))
4667                 return 0;
4668
4669         return cifs_fscache_release_page(page, gfp);
4670 }
4671
4672 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4673                                  unsigned int length)
4674 {
4675         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4676
4677         if (offset == 0 && length == PAGE_SIZE)
4678                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4679 }
4680
4681 static int cifs_launder_page(struct page *page)
4682 {
4683         int rc = 0;
4684         loff_t range_start = page_offset(page);
4685         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4686         struct writeback_control wbc = {
4687                 .sync_mode = WB_SYNC_ALL,
4688                 .nr_to_write = 0,
4689                 .range_start = range_start,
4690                 .range_end = range_end,
4691         };
4692
4693         cifs_dbg(FYI, "Launder page: %p\n", page);
4694
4695         if (clear_page_dirty_for_io(page))
4696                 rc = cifs_writepage_locked(page, &wbc);
4697
4698         cifs_fscache_invalidate_page(page, page->mapping->host);
4699         return rc;
4700 }
4701
4702 void cifs_oplock_break(struct work_struct *work)
4703 {
4704         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4705                                                   oplock_break);
4706         struct inode *inode = d_inode(cfile->dentry);
4707         struct cifsInodeInfo *cinode = CIFS_I(inode);
4708         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4709         struct TCP_Server_Info *server = tcon->ses->server;
4710         int rc = 0;
4711         bool purge_cache = false;
4712
4713         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4714                         TASK_UNINTERRUPTIBLE);
4715
4716         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4717                                       cfile->oplock_epoch, &purge_cache);
4718
4719         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4720                                                 cifs_has_mand_locks(cinode)) {
4721                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4722                          inode);
4723                 cinode->oplock = 0;
4724         }
4725
4726         if (inode && S_ISREG(inode->i_mode)) {
4727                 if (CIFS_CACHE_READ(cinode))
4728                         break_lease(inode, O_RDONLY);
4729                 else
4730                         break_lease(inode, O_WRONLY);
4731                 rc = filemap_fdatawrite(inode->i_mapping);
4732                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4733                         rc = filemap_fdatawait(inode->i_mapping);
4734                         mapping_set_error(inode->i_mapping, rc);
4735                         cifs_zap_mapping(inode);
4736                 }
4737                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4738                 if (CIFS_CACHE_WRITE(cinode))
4739                         goto oplock_break_ack;
4740         }
4741
4742         rc = cifs_push_locks(cfile);
4743         if (rc)
4744                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4745
4746 oplock_break_ack:
4747         /*
4748          * releasing stale oplock after recent reconnect of smb session using
4749          * a now incorrect file handle is not a data integrity issue but do
4750          * not bother sending an oplock release if session to server still is
4751          * disconnected since oplock already released by the server
4752          */
4753         if (!cfile->oplock_break_cancelled) {
4754                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4755                                                              cinode);
4756                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4757         }
4758         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4759         cifs_done_oplock_break(cinode);
4760 }
4761
4762 /*
4763  * The presence of cifs_direct_io() in the address space ops vector
4764  * allowes open() O_DIRECT flags which would have failed otherwise.
4765  *
4766  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4767  * so this method should never be called.
4768  *
4769  * Direct IO is not yet supported in the cached mode. 
4770  */
4771 static ssize_t
4772 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4773 {
4774         /*
4775          * FIXME
4776          * Eventually need to support direct IO for non forcedirectio mounts
4777          */
4778         return -EINVAL;
4779 }
4780
4781
4782 const struct address_space_operations cifs_addr_ops = {
4783         .readpage = cifs_readpage,
4784         .readpages = cifs_readpages,
4785         .writepage = cifs_writepage,
4786         .writepages = cifs_writepages,
4787         .write_begin = cifs_write_begin,
4788         .write_end = cifs_write_end,
4789         .set_page_dirty = __set_page_dirty_nobuffers,
4790         .releasepage = cifs_release_page,
4791         .direct_IO = cifs_direct_io,
4792         .invalidatepage = cifs_invalidate_page,
4793         .launder_page = cifs_launder_page,
4794 };
4795
4796 /*
4797  * cifs_readpages requires the server to support a buffer large enough to
4798  * contain the header plus one complete page of data.  Otherwise, we need
4799  * to leave cifs_readpages out of the address space operations.
4800  */
4801 const struct address_space_operations cifs_addr_ops_smallbuf = {
4802         .readpage = cifs_readpage,
4803         .writepage = cifs_writepage,
4804         .writepages = cifs_writepages,
4805         .write_begin = cifs_write_begin,
4806         .write_end = cifs_write_end,
4807         .set_page_dirty = __set_page_dirty_nobuffers,
4808         .releasepage = cifs_release_page,
4809         .invalidatepage = cifs_invalidate_page,
4810         .launder_page = cifs_launder_page,
4811 };