GNU Linux-libre 4.19.242-gnu1
[releases.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45 #include "smbdirect.h"
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_remap(cifs_sb));
144         cifs_put_tlink(tlink);
145
146         if (rc)
147                 goto posix_open_ret;
148
149         if (presp_data->Type == cpu_to_le32(-1))
150                 goto posix_open_ret; /* open ok, caller does qpathinfo */
151
152         if (!pinode)
153                 goto posix_open_ret; /* caller does not need info */
154
155         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156
157         /* get new inode and set it up */
158         if (*pinode == NULL) {
159                 cifs_fill_uniqueid(sb, &fattr);
160                 *pinode = cifs_iget(sb, &fattr);
161                 if (!*pinode) {
162                         rc = -ENOMEM;
163                         goto posix_open_ret;
164                 }
165         } else {
166                 cifs_revalidate_mapping(*pinode);
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229         if (f_flags & O_SYNC)
230                 create_options |= CREATE_WRITE_THROUGH;
231
232         if (f_flags & O_DIRECT)
233                 create_options |= CREATE_NO_BUFFER;
234
235         oparms.tcon = tcon;
236         oparms.cifs_sb = cifs_sb;
237         oparms.desired_access = desired_access;
238         oparms.create_options = create_options;
239         oparms.disposition = disposition;
240         oparms.path = full_path;
241         oparms.fid = fid;
242         oparms.reconnect = false;
243
244         rc = server->ops->open(xid, &oparms, oplock, buf);
245
246         if (rc)
247                 goto out;
248
249         if (tcon->unix_ext)
250                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251                                               xid);
252         else
253                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254                                          xid, fid);
255
256         if (rc) {
257                 server->ops->close(xid, tcon, fid);
258                 if (rc == -ESTALE)
259                         rc = -EOPENSTALE;
260         }
261
262 out:
263         kfree(buf);
264         return rc;
265 }
266
267 static bool
268 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
269 {
270         struct cifs_fid_locks *cur;
271         bool has_locks = false;
272
273         down_read(&cinode->lock_sem);
274         list_for_each_entry(cur, &cinode->llist, llist) {
275                 if (!list_empty(&cur->locks)) {
276                         has_locks = true;
277                         break;
278                 }
279         }
280         up_read(&cinode->lock_sem);
281         return has_locks;
282 }
283
284 void
285 cifs_down_write(struct rw_semaphore *sem)
286 {
287         while (!down_write_trylock(sem))
288                 msleep(10);
289 }
290
291 struct cifsFileInfo *
292 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
293                   struct tcon_link *tlink, __u32 oplock)
294 {
295         struct dentry *dentry = file_dentry(file);
296         struct inode *inode = d_inode(dentry);
297         struct cifsInodeInfo *cinode = CIFS_I(inode);
298         struct cifsFileInfo *cfile;
299         struct cifs_fid_locks *fdlocks;
300         struct cifs_tcon *tcon = tlink_tcon(tlink);
301         struct TCP_Server_Info *server = tcon->ses->server;
302
303         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
304         if (cfile == NULL)
305                 return cfile;
306
307         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
308         if (!fdlocks) {
309                 kfree(cfile);
310                 return NULL;
311         }
312
313         INIT_LIST_HEAD(&fdlocks->locks);
314         fdlocks->cfile = cfile;
315         cfile->llist = fdlocks;
316
317         cfile->count = 1;
318         cfile->pid = current->tgid;
319         cfile->uid = current_fsuid();
320         cfile->dentry = dget(dentry);
321         cfile->f_flags = file->f_flags;
322         cfile->invalidHandle = false;
323         cfile->tlink = cifs_get_tlink(tlink);
324         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
325         mutex_init(&cfile->fh_mutex);
326         spin_lock_init(&cfile->file_info_lock);
327
328         cifs_sb_active(inode->i_sb);
329
330         /*
331          * If the server returned a read oplock and we have mandatory brlocks,
332          * set oplock level to None.
333          */
334         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
335                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
336                 oplock = 0;
337         }
338
339         cifs_down_write(&cinode->lock_sem);
340         list_add(&fdlocks->llist, &cinode->llist);
341         up_write(&cinode->lock_sem);
342
343         spin_lock(&tcon->open_file_lock);
344         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
345                 oplock = fid->pending_open->oplock;
346         list_del(&fid->pending_open->olist);
347
348         fid->purge_cache = false;
349         server->ops->set_fid(cfile, fid, oplock);
350
351         list_add(&cfile->tlist, &tcon->openFileList);
352
353         /* if readable file instance put first in list*/
354         spin_lock(&cinode->open_file_lock);
355         if (file->f_mode & FMODE_READ)
356                 list_add(&cfile->flist, &cinode->openFileList);
357         else
358                 list_add_tail(&cfile->flist, &cinode->openFileList);
359         spin_unlock(&cinode->open_file_lock);
360         spin_unlock(&tcon->open_file_lock);
361
362         if (fid->purge_cache)
363                 cifs_zap_mapping(inode);
364
365         file->private_data = cfile;
366         return cfile;
367 }
368
369 struct cifsFileInfo *
370 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
371 {
372         spin_lock(&cifs_file->file_info_lock);
373         cifsFileInfo_get_locked(cifs_file);
374         spin_unlock(&cifs_file->file_info_lock);
375         return cifs_file;
376 }
377
378 /**
379  * cifsFileInfo_put - release a reference of file priv data
380  *
381  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
382  */
383 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
384 {
385         _cifsFileInfo_put(cifs_file, true);
386 }
387
388 /**
389  * _cifsFileInfo_put - release a reference of file priv data
390  *
391  * This may involve closing the filehandle @cifs_file out on the
392  * server. Must be called without holding tcon->open_file_lock and
393  * cifs_file->file_info_lock.
394  *
395  * If @wait_for_oplock_handler is true and we are releasing the last
396  * reference, wait for any running oplock break handler of the file
397  * and cancel any pending one. If calling this function from the
398  * oplock break handler, you need to pass false.
399  *
400  */
401 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
402 {
403         struct inode *inode = d_inode(cifs_file->dentry);
404         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
405         struct TCP_Server_Info *server = tcon->ses->server;
406         struct cifsInodeInfo *cifsi = CIFS_I(inode);
407         struct super_block *sb = inode->i_sb;
408         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
409         struct cifsLockInfo *li, *tmp;
410         struct cifs_fid fid;
411         struct cifs_pending_open open;
412         bool oplock_break_cancelled;
413
414         spin_lock(&tcon->open_file_lock);
415         spin_lock(&cifsi->open_file_lock);
416         spin_lock(&cifs_file->file_info_lock);
417         if (--cifs_file->count > 0) {
418                 spin_unlock(&cifs_file->file_info_lock);
419                 spin_unlock(&cifsi->open_file_lock);
420                 spin_unlock(&tcon->open_file_lock);
421                 return;
422         }
423         spin_unlock(&cifs_file->file_info_lock);
424
425         if (server->ops->get_lease_key)
426                 server->ops->get_lease_key(inode, &fid);
427
428         /* store open in pending opens to make sure we don't miss lease break */
429         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
430
431         /* remove it from the lists */
432         list_del(&cifs_file->flist);
433         list_del(&cifs_file->tlist);
434
435         if (list_empty(&cifsi->openFileList)) {
436                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
437                          d_inode(cifs_file->dentry));
438                 /*
439                  * In strict cache mode we need invalidate mapping on the last
440                  * close  because it may cause a error when we open this file
441                  * again and get at least level II oplock.
442                  */
443                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
444                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
445                 cifs_set_oplock_level(cifsi, 0);
446         }
447
448         spin_unlock(&cifsi->open_file_lock);
449         spin_unlock(&tcon->open_file_lock);
450
451         oplock_break_cancelled = wait_oplock_handler ?
452                 cancel_work_sync(&cifs_file->oplock_break) : false;
453
454         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
455                 struct TCP_Server_Info *server = tcon->ses->server;
456                 unsigned int xid;
457
458                 xid = get_xid();
459                 if (server->ops->close)
460                         server->ops->close(xid, tcon, &cifs_file->fid);
461                 _free_xid(xid);
462         }
463
464         if (oplock_break_cancelled)
465                 cifs_done_oplock_break(cifsi);
466
467         cifs_del_pending_open(&open);
468
469         /*
470          * Delete any outstanding lock records. We'll lose them when the file
471          * is closed anyway.
472          */
473         cifs_down_write(&cifsi->lock_sem);
474         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
475                 list_del(&li->llist);
476                 cifs_del_lock_waiters(li);
477                 kfree(li);
478         }
479         list_del(&cifs_file->llist->llist);
480         kfree(cifs_file->llist);
481         up_write(&cifsi->lock_sem);
482
483         cifs_put_tlink(cifs_file->tlink);
484         dput(cifs_file->dentry);
485         cifs_sb_deactive(sb);
486         kfree(cifs_file);
487 }
488
489 int cifs_open(struct inode *inode, struct file *file)
490
491 {
492         int rc = -EACCES;
493         unsigned int xid;
494         __u32 oplock;
495         struct cifs_sb_info *cifs_sb;
496         struct TCP_Server_Info *server;
497         struct cifs_tcon *tcon;
498         struct tcon_link *tlink;
499         struct cifsFileInfo *cfile = NULL;
500         char *full_path = NULL;
501         bool posix_open_ok = false;
502         struct cifs_fid fid;
503         struct cifs_pending_open open;
504
505         xid = get_xid();
506
507         cifs_sb = CIFS_SB(inode->i_sb);
508         tlink = cifs_sb_tlink(cifs_sb);
509         if (IS_ERR(tlink)) {
510                 free_xid(xid);
511                 return PTR_ERR(tlink);
512         }
513         tcon = tlink_tcon(tlink);
514         server = tcon->ses->server;
515
516         full_path = build_path_from_dentry(file_dentry(file));
517         if (full_path == NULL) {
518                 rc = -ENOMEM;
519                 goto out;
520         }
521
522         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
523                  inode, file->f_flags, full_path);
524
525         if (file->f_flags & O_DIRECT &&
526             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
527                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
528                         file->f_op = &cifs_file_direct_nobrl_ops;
529                 else
530                         file->f_op = &cifs_file_direct_ops;
531         }
532
533         if (server->oplocks)
534                 oplock = REQ_OPLOCK;
535         else
536                 oplock = 0;
537
538         if (!tcon->broken_posix_open && tcon->unix_ext &&
539             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
540                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
541                 /* can not refresh inode info since size could be stale */
542                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
543                                 cifs_sb->mnt_file_mode /* ignored */,
544                                 file->f_flags, &oplock, &fid.netfid, xid);
545                 if (rc == 0) {
546                         cifs_dbg(FYI, "posix open succeeded\n");
547                         posix_open_ok = true;
548                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
549                         if (tcon->ses->serverNOS)
550                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
551                                          tcon->ses->serverName,
552                                          tcon->ses->serverNOS);
553                         tcon->broken_posix_open = true;
554                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
555                          (rc != -EOPNOTSUPP)) /* path not found or net err */
556                         goto out;
557                 /*
558                  * Else fallthrough to retry open the old way on network i/o
559                  * or DFS errors.
560                  */
561         }
562
563         if (server->ops->get_lease_key)
564                 server->ops->get_lease_key(inode, &fid);
565
566         cifs_add_pending_open(&fid, tlink, &open);
567
568         if (!posix_open_ok) {
569                 if (server->ops->get_lease_key)
570                         server->ops->get_lease_key(inode, &fid);
571
572                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
573                                   file->f_flags, &oplock, &fid, xid);
574                 if (rc) {
575                         cifs_del_pending_open(&open);
576                         goto out;
577                 }
578         }
579
580         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
581         if (cfile == NULL) {
582                 if (server->ops->close)
583                         server->ops->close(xid, tcon, &fid);
584                 cifs_del_pending_open(&open);
585                 rc = -ENOMEM;
586                 goto out;
587         }
588
589         cifs_fscache_set_inode_cookie(inode, file);
590
591         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
592                 /*
593                  * Time to set mode which we can not set earlier due to
594                  * problems creating new read-only files.
595                  */
596                 struct cifs_unix_set_info_args args = {
597                         .mode   = inode->i_mode,
598                         .uid    = INVALID_UID, /* no change */
599                         .gid    = INVALID_GID, /* no change */
600                         .ctime  = NO_CHANGE_64,
601                         .atime  = NO_CHANGE_64,
602                         .mtime  = NO_CHANGE_64,
603                         .device = 0,
604                 };
605                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
606                                        cfile->pid);
607         }
608
609 out:
610         kfree(full_path);
611         free_xid(xid);
612         cifs_put_tlink(tlink);
613         return rc;
614 }
615
616 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
617
618 /*
619  * Try to reacquire byte range locks that were released when session
620  * to server was lost.
621  */
622 static int
623 cifs_relock_file(struct cifsFileInfo *cfile)
624 {
625         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
626         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
627         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
628         int rc = 0;
629
630         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
631         if (cinode->can_cache_brlcks) {
632                 /* can cache locks - no need to relock */
633                 up_read(&cinode->lock_sem);
634                 return rc;
635         }
636
637         if (cap_unix(tcon->ses) &&
638             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
639             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
640                 rc = cifs_push_posix_locks(cfile);
641         else
642                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
643
644         up_read(&cinode->lock_sem);
645         return rc;
646 }
647
648 static int
649 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
650 {
651         int rc = -EACCES;
652         unsigned int xid;
653         __u32 oplock;
654         struct cifs_sb_info *cifs_sb;
655         struct cifs_tcon *tcon;
656         struct TCP_Server_Info *server;
657         struct cifsInodeInfo *cinode;
658         struct inode *inode;
659         char *full_path = NULL;
660         int desired_access;
661         int disposition = FILE_OPEN;
662         int create_options = CREATE_NOT_DIR;
663         struct cifs_open_parms oparms;
664
665         xid = get_xid();
666         mutex_lock(&cfile->fh_mutex);
667         if (!cfile->invalidHandle) {
668                 mutex_unlock(&cfile->fh_mutex);
669                 rc = 0;
670                 free_xid(xid);
671                 return rc;
672         }
673
674         inode = d_inode(cfile->dentry);
675         cifs_sb = CIFS_SB(inode->i_sb);
676         tcon = tlink_tcon(cfile->tlink);
677         server = tcon->ses->server;
678
679         /*
680          * Can not grab rename sem here because various ops, including those
681          * that already have the rename sem can end up causing writepage to get
682          * called and if the server was down that means we end up here, and we
683          * can never tell if the caller already has the rename_sem.
684          */
685         full_path = build_path_from_dentry(cfile->dentry);
686         if (full_path == NULL) {
687                 rc = -ENOMEM;
688                 mutex_unlock(&cfile->fh_mutex);
689                 free_xid(xid);
690                 return rc;
691         }
692
693         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
694                  inode, cfile->f_flags, full_path);
695
696         if (tcon->ses->server->oplocks)
697                 oplock = REQ_OPLOCK;
698         else
699                 oplock = 0;
700
701         if (tcon->unix_ext && cap_unix(tcon->ses) &&
702             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
703                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
704                 /*
705                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
706                  * original open. Must mask them off for a reopen.
707                  */
708                 unsigned int oflags = cfile->f_flags &
709                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
710
711                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
712                                      cifs_sb->mnt_file_mode /* ignored */,
713                                      oflags, &oplock, &cfile->fid.netfid, xid);
714                 if (rc == 0) {
715                         cifs_dbg(FYI, "posix reopen succeeded\n");
716                         oparms.reconnect = true;
717                         goto reopen_success;
718                 }
719                 /*
720                  * fallthrough to retry open the old way on errors, especially
721                  * in the reconnect path it is important to retry hard
722                  */
723         }
724
725         desired_access = cifs_convert_flags(cfile->f_flags);
726
727         if (backup_cred(cifs_sb))
728                 create_options |= CREATE_OPEN_BACKUP_INTENT;
729
730         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
731         if (cfile->f_flags & O_SYNC)
732                 create_options |= CREATE_WRITE_THROUGH;
733
734         if (cfile->f_flags & O_DIRECT)
735                 create_options |= CREATE_NO_BUFFER;
736
737         if (server->ops->get_lease_key)
738                 server->ops->get_lease_key(inode, &cfile->fid);
739
740         oparms.tcon = tcon;
741         oparms.cifs_sb = cifs_sb;
742         oparms.desired_access = desired_access;
743         oparms.create_options = create_options;
744         oparms.disposition = disposition;
745         oparms.path = full_path;
746         oparms.fid = &cfile->fid;
747         oparms.reconnect = true;
748
749         /*
750          * Can not refresh inode by passing in file_info buf to be returned by
751          * ops->open and then calling get_inode_info with returned buf since
752          * file might have write behind data that needs to be flushed and server
753          * version of file size can be stale. If we knew for sure that inode was
754          * not dirty locally we could do this.
755          */
756         rc = server->ops->open(xid, &oparms, &oplock, NULL);
757         if (rc == -ENOENT && oparms.reconnect == false) {
758                 /* durable handle timeout is expired - open the file again */
759                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
760                 /* indicate that we need to relock the file */
761                 oparms.reconnect = true;
762         }
763
764         if (rc) {
765                 mutex_unlock(&cfile->fh_mutex);
766                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
767                 cifs_dbg(FYI, "oplock: %d\n", oplock);
768                 goto reopen_error_exit;
769         }
770
771 reopen_success:
772         cfile->invalidHandle = false;
773         mutex_unlock(&cfile->fh_mutex);
774         cinode = CIFS_I(inode);
775
776         if (can_flush) {
777                 rc = filemap_write_and_wait(inode->i_mapping);
778                 if (!is_interrupt_error(rc))
779                         mapping_set_error(inode->i_mapping, rc);
780
781                 if (tcon->unix_ext)
782                         rc = cifs_get_inode_info_unix(&inode, full_path,
783                                                       inode->i_sb, xid);
784                 else
785                         rc = cifs_get_inode_info(&inode, full_path, NULL,
786                                                  inode->i_sb, xid, NULL);
787         }
788         /*
789          * Else we are writing out data to server already and could deadlock if
790          * we tried to flush data, and since we do not know if we have data that
791          * would invalidate the current end of file on the server we can not go
792          * to the server to get the new inode info.
793          */
794
795         /*
796          * If the server returned a read oplock and we have mandatory brlocks,
797          * set oplock level to None.
798          */
799         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
800                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
801                 oplock = 0;
802         }
803
804         server->ops->set_fid(cfile, &cfile->fid, oplock);
805         if (oparms.reconnect)
806                 cifs_relock_file(cfile);
807
808 reopen_error_exit:
809         kfree(full_path);
810         free_xid(xid);
811         return rc;
812 }
813
814 int cifs_close(struct inode *inode, struct file *file)
815 {
816         if (file->private_data != NULL) {
817                 cifsFileInfo_put(file->private_data);
818                 file->private_data = NULL;
819         }
820
821         /* return code from the ->release op is always ignored */
822         return 0;
823 }
824
825 void
826 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
827 {
828         struct cifsFileInfo *open_file;
829         struct list_head *tmp;
830         struct list_head *tmp1;
831         struct list_head tmp_list;
832
833         if (!tcon->use_persistent || !tcon->need_reopen_files)
834                 return;
835
836         tcon->need_reopen_files = false;
837
838         cifs_dbg(FYI, "Reopen persistent handles");
839         INIT_LIST_HEAD(&tmp_list);
840
841         /* list all files open on tree connection, reopen resilient handles  */
842         spin_lock(&tcon->open_file_lock);
843         list_for_each(tmp, &tcon->openFileList) {
844                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
845                 if (!open_file->invalidHandle)
846                         continue;
847                 cifsFileInfo_get(open_file);
848                 list_add_tail(&open_file->rlist, &tmp_list);
849         }
850         spin_unlock(&tcon->open_file_lock);
851
852         list_for_each_safe(tmp, tmp1, &tmp_list) {
853                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
854                 if (cifs_reopen_file(open_file, false /* do not flush */))
855                         tcon->need_reopen_files = true;
856                 list_del_init(&open_file->rlist);
857                 cifsFileInfo_put(open_file);
858         }
859 }
860
861 int cifs_closedir(struct inode *inode, struct file *file)
862 {
863         int rc = 0;
864         unsigned int xid;
865         struct cifsFileInfo *cfile = file->private_data;
866         struct cifs_tcon *tcon;
867         struct TCP_Server_Info *server;
868         char *buf;
869
870         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
871
872         if (cfile == NULL)
873                 return rc;
874
875         xid = get_xid();
876         tcon = tlink_tcon(cfile->tlink);
877         server = tcon->ses->server;
878
879         cifs_dbg(FYI, "Freeing private data in close dir\n");
880         spin_lock(&cfile->file_info_lock);
881         if (server->ops->dir_needs_close(cfile)) {
882                 cfile->invalidHandle = true;
883                 spin_unlock(&cfile->file_info_lock);
884                 if (server->ops->close_dir)
885                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
886                 else
887                         rc = -ENOSYS;
888                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
889                 /* not much we can do if it fails anyway, ignore rc */
890                 rc = 0;
891         } else
892                 spin_unlock(&cfile->file_info_lock);
893
894         buf = cfile->srch_inf.ntwrk_buf_start;
895         if (buf) {
896                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
897                 cfile->srch_inf.ntwrk_buf_start = NULL;
898                 if (cfile->srch_inf.smallBuf)
899                         cifs_small_buf_release(buf);
900                 else
901                         cifs_buf_release(buf);
902         }
903
904         cifs_put_tlink(cfile->tlink);
905         kfree(file->private_data);
906         file->private_data = NULL;
907         /* BB can we lock the filestruct while this is going on? */
908         free_xid(xid);
909         return rc;
910 }
911
912 static struct cifsLockInfo *
913 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
914 {
915         struct cifsLockInfo *lock =
916                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
917         if (!lock)
918                 return lock;
919         lock->offset = offset;
920         lock->length = length;
921         lock->type = type;
922         lock->pid = current->tgid;
923         INIT_LIST_HEAD(&lock->blist);
924         init_waitqueue_head(&lock->block_q);
925         return lock;
926 }
927
928 void
929 cifs_del_lock_waiters(struct cifsLockInfo *lock)
930 {
931         struct cifsLockInfo *li, *tmp;
932         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
933                 list_del_init(&li->blist);
934                 wake_up(&li->block_q);
935         }
936 }
937
938 #define CIFS_LOCK_OP    0
939 #define CIFS_READ_OP    1
940 #define CIFS_WRITE_OP   2
941
942 /* @rw_check : 0 - no op, 1 - read, 2 - write */
943 static bool
944 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
945                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
946                             struct cifsLockInfo **conf_lock, int rw_check)
947 {
948         struct cifsLockInfo *li;
949         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
950         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
951
952         list_for_each_entry(li, &fdlocks->locks, llist) {
953                 if (offset + length <= li->offset ||
954                     offset >= li->offset + li->length)
955                         continue;
956                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
957                     server->ops->compare_fids(cfile, cur_cfile)) {
958                         /* shared lock prevents write op through the same fid */
959                         if (!(li->type & server->vals->shared_lock_type) ||
960                             rw_check != CIFS_WRITE_OP)
961                                 continue;
962                 }
963                 if ((type & server->vals->shared_lock_type) &&
964                     ((server->ops->compare_fids(cfile, cur_cfile) &&
965                      current->tgid == li->pid) || type == li->type))
966                         continue;
967                 if (conf_lock)
968                         *conf_lock = li;
969                 return true;
970         }
971         return false;
972 }
973
974 bool
975 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
976                         __u8 type, struct cifsLockInfo **conf_lock,
977                         int rw_check)
978 {
979         bool rc = false;
980         struct cifs_fid_locks *cur;
981         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
982
983         list_for_each_entry(cur, &cinode->llist, llist) {
984                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
985                                                  cfile, conf_lock, rw_check);
986                 if (rc)
987                         break;
988         }
989
990         return rc;
991 }
992
993 /*
994  * Check if there is another lock that prevents us to set the lock (mandatory
995  * style). If such a lock exists, update the flock structure with its
996  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
997  * or leave it the same if we can't. Returns 0 if we don't need to request to
998  * the server or 1 otherwise.
999  */
1000 static int
1001 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1002                __u8 type, struct file_lock *flock)
1003 {
1004         int rc = 0;
1005         struct cifsLockInfo *conf_lock;
1006         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1007         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1008         bool exist;
1009
1010         down_read(&cinode->lock_sem);
1011
1012         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1013                                         &conf_lock, CIFS_LOCK_OP);
1014         if (exist) {
1015                 flock->fl_start = conf_lock->offset;
1016                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1017                 flock->fl_pid = conf_lock->pid;
1018                 if (conf_lock->type & server->vals->shared_lock_type)
1019                         flock->fl_type = F_RDLCK;
1020                 else
1021                         flock->fl_type = F_WRLCK;
1022         } else if (!cinode->can_cache_brlcks)
1023                 rc = 1;
1024         else
1025                 flock->fl_type = F_UNLCK;
1026
1027         up_read(&cinode->lock_sem);
1028         return rc;
1029 }
1030
1031 static void
1032 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1033 {
1034         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1035         cifs_down_write(&cinode->lock_sem);
1036         list_add_tail(&lock->llist, &cfile->llist->locks);
1037         up_write(&cinode->lock_sem);
1038 }
1039
1040 /*
1041  * Set the byte-range lock (mandatory style). Returns:
1042  * 1) 0, if we set the lock and don't need to request to the server;
1043  * 2) 1, if no locks prevent us but we need to request to the server;
1044  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
1045  */
1046 static int
1047 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1048                  bool wait)
1049 {
1050         struct cifsLockInfo *conf_lock;
1051         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1052         bool exist;
1053         int rc = 0;
1054
1055 try_again:
1056         exist = false;
1057         cifs_down_write(&cinode->lock_sem);
1058
1059         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1060                                         lock->type, &conf_lock, CIFS_LOCK_OP);
1061         if (!exist && cinode->can_cache_brlcks) {
1062                 list_add_tail(&lock->llist, &cfile->llist->locks);
1063                 up_write(&cinode->lock_sem);
1064                 return rc;
1065         }
1066
1067         if (!exist)
1068                 rc = 1;
1069         else if (!wait)
1070                 rc = -EACCES;
1071         else {
1072                 list_add_tail(&lock->blist, &conf_lock->blist);
1073                 up_write(&cinode->lock_sem);
1074                 rc = wait_event_interruptible(lock->block_q,
1075                                         (lock->blist.prev == &lock->blist) &&
1076                                         (lock->blist.next == &lock->blist));
1077                 if (!rc)
1078                         goto try_again;
1079                 cifs_down_write(&cinode->lock_sem);
1080                 list_del_init(&lock->blist);
1081         }
1082
1083         up_write(&cinode->lock_sem);
1084         return rc;
1085 }
1086
1087 /*
1088  * Check if there is another lock that prevents us to set the lock (posix
1089  * style). If such a lock exists, update the flock structure with its
1090  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1091  * or leave it the same if we can't. Returns 0 if we don't need to request to
1092  * the server or 1 otherwise.
1093  */
1094 static int
1095 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1096 {
1097         int rc = 0;
1098         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1099         unsigned char saved_type = flock->fl_type;
1100
1101         if ((flock->fl_flags & FL_POSIX) == 0)
1102                 return 1;
1103
1104         down_read(&cinode->lock_sem);
1105         posix_test_lock(file, flock);
1106
1107         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1108                 flock->fl_type = saved_type;
1109                 rc = 1;
1110         }
1111
1112         up_read(&cinode->lock_sem);
1113         return rc;
1114 }
1115
1116 /*
1117  * Set the byte-range lock (posix style). Returns:
1118  * 1) 0, if we set the lock and don't need to request to the server;
1119  * 2) 1, if we need to request to the server;
1120  * 3) <0, if the error occurs while setting the lock.
1121  */
1122 static int
1123 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1124 {
1125         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1126         int rc = 1;
1127
1128         if ((flock->fl_flags & FL_POSIX) == 0)
1129                 return rc;
1130
1131 try_again:
1132         cifs_down_write(&cinode->lock_sem);
1133         if (!cinode->can_cache_brlcks) {
1134                 up_write(&cinode->lock_sem);
1135                 return rc;
1136         }
1137
1138         rc = posix_lock_file(file, flock, NULL);
1139         up_write(&cinode->lock_sem);
1140         if (rc == FILE_LOCK_DEFERRED) {
1141                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1142                 if (!rc)
1143                         goto try_again;
1144                 posix_unblock_lock(flock);
1145         }
1146         return rc;
1147 }
1148
1149 int
1150 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1151 {
1152         unsigned int xid;
1153         int rc = 0, stored_rc;
1154         struct cifsLockInfo *li, *tmp;
1155         struct cifs_tcon *tcon;
1156         unsigned int num, max_num, max_buf;
1157         LOCKING_ANDX_RANGE *buf, *cur;
1158         static const int types[] = {
1159                 LOCKING_ANDX_LARGE_FILES,
1160                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1161         };
1162         int i;
1163
1164         xid = get_xid();
1165         tcon = tlink_tcon(cfile->tlink);
1166
1167         /*
1168          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1169          * and check it before using.
1170          */
1171         max_buf = tcon->ses->server->maxBuf;
1172         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1173                 free_xid(xid);
1174                 return -EINVAL;
1175         }
1176
1177         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1178                      PAGE_SIZE);
1179         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1180                         PAGE_SIZE);
1181         max_num = (max_buf - sizeof(struct smb_hdr)) /
1182                                                 sizeof(LOCKING_ANDX_RANGE);
1183         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1184         if (!buf) {
1185                 free_xid(xid);
1186                 return -ENOMEM;
1187         }
1188
1189         for (i = 0; i < 2; i++) {
1190                 cur = buf;
1191                 num = 0;
1192                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1193                         if (li->type != types[i])
1194                                 continue;
1195                         cur->Pid = cpu_to_le16(li->pid);
1196                         cur->LengthLow = cpu_to_le32((u32)li->length);
1197                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1198                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1199                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1200                         if (++num == max_num) {
1201                                 stored_rc = cifs_lockv(xid, tcon,
1202                                                        cfile->fid.netfid,
1203                                                        (__u8)li->type, 0, num,
1204                                                        buf);
1205                                 if (stored_rc)
1206                                         rc = stored_rc;
1207                                 cur = buf;
1208                                 num = 0;
1209                         } else
1210                                 cur++;
1211                 }
1212
1213                 if (num) {
1214                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1215                                                (__u8)types[i], 0, num, buf);
1216                         if (stored_rc)
1217                                 rc = stored_rc;
1218                 }
1219         }
1220
1221         kfree(buf);
1222         free_xid(xid);
1223         return rc;
1224 }
1225
1226 static __u32
1227 hash_lockowner(fl_owner_t owner)
1228 {
1229         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1230 }
1231
1232 struct lock_to_push {
1233         struct list_head llist;
1234         __u64 offset;
1235         __u64 length;
1236         __u32 pid;
1237         __u16 netfid;
1238         __u8 type;
1239 };
1240
1241 static int
1242 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1243 {
1244         struct inode *inode = d_inode(cfile->dentry);
1245         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1246         struct file_lock *flock;
1247         struct file_lock_context *flctx = inode->i_flctx;
1248         unsigned int count = 0, i;
1249         int rc = 0, xid, type;
1250         struct list_head locks_to_send, *el;
1251         struct lock_to_push *lck, *tmp;
1252         __u64 length;
1253
1254         xid = get_xid();
1255
1256         if (!flctx)
1257                 goto out;
1258
1259         spin_lock(&flctx->flc_lock);
1260         list_for_each(el, &flctx->flc_posix) {
1261                 count++;
1262         }
1263         spin_unlock(&flctx->flc_lock);
1264
1265         INIT_LIST_HEAD(&locks_to_send);
1266
1267         /*
1268          * Allocating count locks is enough because no FL_POSIX locks can be
1269          * added to the list while we are holding cinode->lock_sem that
1270          * protects locking operations of this inode.
1271          */
1272         for (i = 0; i < count; i++) {
1273                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1274                 if (!lck) {
1275                         rc = -ENOMEM;
1276                         goto err_out;
1277                 }
1278                 list_add_tail(&lck->llist, &locks_to_send);
1279         }
1280
1281         el = locks_to_send.next;
1282         spin_lock(&flctx->flc_lock);
1283         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1284                 if (el == &locks_to_send) {
1285                         /*
1286                          * The list ended. We don't have enough allocated
1287                          * structures - something is really wrong.
1288                          */
1289                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1290                         break;
1291                 }
1292                 length = 1 + flock->fl_end - flock->fl_start;
1293                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1294                         type = CIFS_RDLCK;
1295                 else
1296                         type = CIFS_WRLCK;
1297                 lck = list_entry(el, struct lock_to_push, llist);
1298                 lck->pid = hash_lockowner(flock->fl_owner);
1299                 lck->netfid = cfile->fid.netfid;
1300                 lck->length = length;
1301                 lck->type = type;
1302                 lck->offset = flock->fl_start;
1303         }
1304         spin_unlock(&flctx->flc_lock);
1305
1306         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1307                 int stored_rc;
1308
1309                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1310                                              lck->offset, lck->length, NULL,
1311                                              lck->type, 0);
1312                 if (stored_rc)
1313                         rc = stored_rc;
1314                 list_del(&lck->llist);
1315                 kfree(lck);
1316         }
1317
1318 out:
1319         free_xid(xid);
1320         return rc;
1321 err_out:
1322         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1323                 list_del(&lck->llist);
1324                 kfree(lck);
1325         }
1326         goto out;
1327 }
1328
1329 static int
1330 cifs_push_locks(struct cifsFileInfo *cfile)
1331 {
1332         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1333         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1334         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1335         int rc = 0;
1336
1337         /* we are going to update can_cache_brlcks here - need a write access */
1338         cifs_down_write(&cinode->lock_sem);
1339         if (!cinode->can_cache_brlcks) {
1340                 up_write(&cinode->lock_sem);
1341                 return rc;
1342         }
1343
1344         if (cap_unix(tcon->ses) &&
1345             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1346             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1347                 rc = cifs_push_posix_locks(cfile);
1348         else
1349                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1350
1351         cinode->can_cache_brlcks = false;
1352         up_write(&cinode->lock_sem);
1353         return rc;
1354 }
1355
1356 static void
1357 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1358                 bool *wait_flag, struct TCP_Server_Info *server)
1359 {
1360         if (flock->fl_flags & FL_POSIX)
1361                 cifs_dbg(FYI, "Posix\n");
1362         if (flock->fl_flags & FL_FLOCK)
1363                 cifs_dbg(FYI, "Flock\n");
1364         if (flock->fl_flags & FL_SLEEP) {
1365                 cifs_dbg(FYI, "Blocking lock\n");
1366                 *wait_flag = true;
1367         }
1368         if (flock->fl_flags & FL_ACCESS)
1369                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1370         if (flock->fl_flags & FL_LEASE)
1371                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1372         if (flock->fl_flags &
1373             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1374                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1375                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1376
1377         *type = server->vals->large_lock_type;
1378         if (flock->fl_type == F_WRLCK) {
1379                 cifs_dbg(FYI, "F_WRLCK\n");
1380                 *type |= server->vals->exclusive_lock_type;
1381                 *lock = 1;
1382         } else if (flock->fl_type == F_UNLCK) {
1383                 cifs_dbg(FYI, "F_UNLCK\n");
1384                 *type |= server->vals->unlock_lock_type;
1385                 *unlock = 1;
1386                 /* Check if unlock includes more than one lock range */
1387         } else if (flock->fl_type == F_RDLCK) {
1388                 cifs_dbg(FYI, "F_RDLCK\n");
1389                 *type |= server->vals->shared_lock_type;
1390                 *lock = 1;
1391         } else if (flock->fl_type == F_EXLCK) {
1392                 cifs_dbg(FYI, "F_EXLCK\n");
1393                 *type |= server->vals->exclusive_lock_type;
1394                 *lock = 1;
1395         } else if (flock->fl_type == F_SHLCK) {
1396                 cifs_dbg(FYI, "F_SHLCK\n");
1397                 *type |= server->vals->shared_lock_type;
1398                 *lock = 1;
1399         } else
1400                 cifs_dbg(FYI, "Unknown type of lock\n");
1401 }
1402
1403 static int
1404 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1405            bool wait_flag, bool posix_lck, unsigned int xid)
1406 {
1407         int rc = 0;
1408         __u64 length = 1 + flock->fl_end - flock->fl_start;
1409         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1410         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1411         struct TCP_Server_Info *server = tcon->ses->server;
1412         __u16 netfid = cfile->fid.netfid;
1413
1414         if (posix_lck) {
1415                 int posix_lock_type;
1416
1417                 rc = cifs_posix_lock_test(file, flock);
1418                 if (!rc)
1419                         return rc;
1420
1421                 if (type & server->vals->shared_lock_type)
1422                         posix_lock_type = CIFS_RDLCK;
1423                 else
1424                         posix_lock_type = CIFS_WRLCK;
1425                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1426                                       hash_lockowner(flock->fl_owner),
1427                                       flock->fl_start, length, flock,
1428                                       posix_lock_type, wait_flag);
1429                 return rc;
1430         }
1431
1432         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1433         if (!rc)
1434                 return rc;
1435
1436         /* BB we could chain these into one lock request BB */
1437         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1438                                     1, 0, false);
1439         if (rc == 0) {
1440                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1441                                             type, 0, 1, false);
1442                 flock->fl_type = F_UNLCK;
1443                 if (rc != 0)
1444                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1445                                  rc);
1446                 return 0;
1447         }
1448
1449         if (type & server->vals->shared_lock_type) {
1450                 flock->fl_type = F_WRLCK;
1451                 return 0;
1452         }
1453
1454         type &= ~server->vals->exclusive_lock_type;
1455
1456         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1457                                     type | server->vals->shared_lock_type,
1458                                     1, 0, false);
1459         if (rc == 0) {
1460                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1461                         type | server->vals->shared_lock_type, 0, 1, false);
1462                 flock->fl_type = F_RDLCK;
1463                 if (rc != 0)
1464                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1465                                  rc);
1466         } else
1467                 flock->fl_type = F_WRLCK;
1468
1469         return 0;
1470 }
1471
1472 void
1473 cifs_move_llist(struct list_head *source, struct list_head *dest)
1474 {
1475         struct list_head *li, *tmp;
1476         list_for_each_safe(li, tmp, source)
1477                 list_move(li, dest);
1478 }
1479
1480 void
1481 cifs_free_llist(struct list_head *llist)
1482 {
1483         struct cifsLockInfo *li, *tmp;
1484         list_for_each_entry_safe(li, tmp, llist, llist) {
1485                 cifs_del_lock_waiters(li);
1486                 list_del(&li->llist);
1487                 kfree(li);
1488         }
1489 }
1490
1491 int
1492 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1493                   unsigned int xid)
1494 {
1495         int rc = 0, stored_rc;
1496         static const int types[] = {
1497                 LOCKING_ANDX_LARGE_FILES,
1498                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1499         };
1500         unsigned int i;
1501         unsigned int max_num, num, max_buf;
1502         LOCKING_ANDX_RANGE *buf, *cur;
1503         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1504         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1505         struct cifsLockInfo *li, *tmp;
1506         __u64 length = 1 + flock->fl_end - flock->fl_start;
1507         struct list_head tmp_llist;
1508
1509         INIT_LIST_HEAD(&tmp_llist);
1510
1511         /*
1512          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1513          * and check it before using.
1514          */
1515         max_buf = tcon->ses->server->maxBuf;
1516         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1517                 return -EINVAL;
1518
1519         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1520                      PAGE_SIZE);
1521         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1522                         PAGE_SIZE);
1523         max_num = (max_buf - sizeof(struct smb_hdr)) /
1524                                                 sizeof(LOCKING_ANDX_RANGE);
1525         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1526         if (!buf)
1527                 return -ENOMEM;
1528
1529         cifs_down_write(&cinode->lock_sem);
1530         for (i = 0; i < 2; i++) {
1531                 cur = buf;
1532                 num = 0;
1533                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1534                         if (flock->fl_start > li->offset ||
1535                             (flock->fl_start + length) <
1536                             (li->offset + li->length))
1537                                 continue;
1538                         if (current->tgid != li->pid)
1539                                 continue;
1540                         if (types[i] != li->type)
1541                                 continue;
1542                         if (cinode->can_cache_brlcks) {
1543                                 /*
1544                                  * We can cache brlock requests - simply remove
1545                                  * a lock from the file's list.
1546                                  */
1547                                 list_del(&li->llist);
1548                                 cifs_del_lock_waiters(li);
1549                                 kfree(li);
1550                                 continue;
1551                         }
1552                         cur->Pid = cpu_to_le16(li->pid);
1553                         cur->LengthLow = cpu_to_le32((u32)li->length);
1554                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1555                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1556                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1557                         /*
1558                          * We need to save a lock here to let us add it again to
1559                          * the file's list if the unlock range request fails on
1560                          * the server.
1561                          */
1562                         list_move(&li->llist, &tmp_llist);
1563                         if (++num == max_num) {
1564                                 stored_rc = cifs_lockv(xid, tcon,
1565                                                        cfile->fid.netfid,
1566                                                        li->type, num, 0, buf);
1567                                 if (stored_rc) {
1568                                         /*
1569                                          * We failed on the unlock range
1570                                          * request - add all locks from the tmp
1571                                          * list to the head of the file's list.
1572                                          */
1573                                         cifs_move_llist(&tmp_llist,
1574                                                         &cfile->llist->locks);
1575                                         rc = stored_rc;
1576                                 } else
1577                                         /*
1578                                          * The unlock range request succeed -
1579                                          * free the tmp list.
1580                                          */
1581                                         cifs_free_llist(&tmp_llist);
1582                                 cur = buf;
1583                                 num = 0;
1584                         } else
1585                                 cur++;
1586                 }
1587                 if (num) {
1588                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1589                                                types[i], num, 0, buf);
1590                         if (stored_rc) {
1591                                 cifs_move_llist(&tmp_llist,
1592                                                 &cfile->llist->locks);
1593                                 rc = stored_rc;
1594                         } else
1595                                 cifs_free_llist(&tmp_llist);
1596                 }
1597         }
1598
1599         up_write(&cinode->lock_sem);
1600         kfree(buf);
1601         return rc;
1602 }
1603
1604 static int
1605 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1606            bool wait_flag, bool posix_lck, int lock, int unlock,
1607            unsigned int xid)
1608 {
1609         int rc = 0;
1610         __u64 length = 1 + flock->fl_end - flock->fl_start;
1611         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1612         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1613         struct TCP_Server_Info *server = tcon->ses->server;
1614         struct inode *inode = d_inode(cfile->dentry);
1615
1616         if (posix_lck) {
1617                 int posix_lock_type;
1618
1619                 rc = cifs_posix_lock_set(file, flock);
1620                 if (!rc || rc < 0)
1621                         return rc;
1622
1623                 if (type & server->vals->shared_lock_type)
1624                         posix_lock_type = CIFS_RDLCK;
1625                 else
1626                         posix_lock_type = CIFS_WRLCK;
1627
1628                 if (unlock == 1)
1629                         posix_lock_type = CIFS_UNLCK;
1630
1631                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1632                                       hash_lockowner(flock->fl_owner),
1633                                       flock->fl_start, length,
1634                                       NULL, posix_lock_type, wait_flag);
1635                 goto out;
1636         }
1637
1638         if (lock) {
1639                 struct cifsLockInfo *lock;
1640
1641                 lock = cifs_lock_init(flock->fl_start, length, type);
1642                 if (!lock)
1643                         return -ENOMEM;
1644
1645                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1646                 if (rc < 0) {
1647                         kfree(lock);
1648                         return rc;
1649                 }
1650                 if (!rc)
1651                         goto out;
1652
1653                 /*
1654                  * Windows 7 server can delay breaking lease from read to None
1655                  * if we set a byte-range lock on a file - break it explicitly
1656                  * before sending the lock to the server to be sure the next
1657                  * read won't conflict with non-overlapted locks due to
1658                  * pagereading.
1659                  */
1660                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1661                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1662                         cifs_zap_mapping(inode);
1663                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1664                                  inode);
1665                         CIFS_I(inode)->oplock = 0;
1666                 }
1667
1668                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1669                                             type, 1, 0, wait_flag);
1670                 if (rc) {
1671                         kfree(lock);
1672                         return rc;
1673                 }
1674
1675                 cifs_lock_add(cfile, lock);
1676         } else if (unlock)
1677                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1678
1679 out:
1680         if (flock->fl_flags & FL_POSIX) {
1681                 /*
1682                  * If this is a request to remove all locks because we
1683                  * are closing the file, it doesn't matter if the
1684                  * unlocking failed as both cifs.ko and the SMB server
1685                  * remove the lock on file close
1686                  */
1687                 if (rc) {
1688                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1689                         if (!(flock->fl_flags & FL_CLOSE))
1690                                 return rc;
1691                 }
1692                 rc = locks_lock_file_wait(file, flock);
1693         }
1694         return rc;
1695 }
1696
1697 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1698 {
1699         int rc, xid;
1700         int lock = 0, unlock = 0;
1701         bool wait_flag = false;
1702         bool posix_lck = false;
1703         struct cifs_sb_info *cifs_sb;
1704         struct cifs_tcon *tcon;
1705         struct cifsInodeInfo *cinode;
1706         struct cifsFileInfo *cfile;
1707         __u16 netfid;
1708         __u32 type;
1709
1710         rc = -EACCES;
1711         xid = get_xid();
1712
1713         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1714                  cmd, flock->fl_flags, flock->fl_type,
1715                  flock->fl_start, flock->fl_end);
1716
1717         cfile = (struct cifsFileInfo *)file->private_data;
1718         tcon = tlink_tcon(cfile->tlink);
1719
1720         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1721                         tcon->ses->server);
1722
1723         cifs_sb = CIFS_FILE_SB(file);
1724         netfid = cfile->fid.netfid;
1725         cinode = CIFS_I(file_inode(file));
1726
1727         if (cap_unix(tcon->ses) &&
1728             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1729             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1730                 posix_lck = true;
1731         /*
1732          * BB add code here to normalize offset and length to account for
1733          * negative length which we can not accept over the wire.
1734          */
1735         if (IS_GETLK(cmd)) {
1736                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1737                 free_xid(xid);
1738                 return rc;
1739         }
1740
1741         if (!lock && !unlock) {
1742                 /*
1743                  * if no lock or unlock then nothing to do since we do not
1744                  * know what it is
1745                  */
1746                 free_xid(xid);
1747                 return -EOPNOTSUPP;
1748         }
1749
1750         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1751                         xid);
1752         free_xid(xid);
1753         return rc;
1754 }
1755
1756 /*
1757  * update the file size (if needed) after a write. Should be called with
1758  * the inode->i_lock held
1759  */
1760 void
1761 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1762                       unsigned int bytes_written)
1763 {
1764         loff_t end_of_write = offset + bytes_written;
1765
1766         if (end_of_write > cifsi->server_eof)
1767                 cifsi->server_eof = end_of_write;
1768 }
1769
1770 static ssize_t
1771 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1772            size_t write_size, loff_t *offset)
1773 {
1774         int rc = 0;
1775         unsigned int bytes_written = 0;
1776         unsigned int total_written;
1777         struct cifs_sb_info *cifs_sb;
1778         struct cifs_tcon *tcon;
1779         struct TCP_Server_Info *server;
1780         unsigned int xid;
1781         struct dentry *dentry = open_file->dentry;
1782         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1783         struct cifs_io_parms io_parms;
1784
1785         cifs_sb = CIFS_SB(dentry->d_sb);
1786
1787         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1788                  write_size, *offset, dentry);
1789
1790         tcon = tlink_tcon(open_file->tlink);
1791         server = tcon->ses->server;
1792
1793         if (!server->ops->sync_write)
1794                 return -ENOSYS;
1795
1796         xid = get_xid();
1797
1798         for (total_written = 0; write_size > total_written;
1799              total_written += bytes_written) {
1800                 rc = -EAGAIN;
1801                 while (rc == -EAGAIN) {
1802                         struct kvec iov[2];
1803                         unsigned int len;
1804
1805                         if (open_file->invalidHandle) {
1806                                 /* we could deadlock if we called
1807                                    filemap_fdatawait from here so tell
1808                                    reopen_file not to flush data to
1809                                    server now */
1810                                 rc = cifs_reopen_file(open_file, false);
1811                                 if (rc != 0)
1812                                         break;
1813                         }
1814
1815                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1816                                   (unsigned int)write_size - total_written);
1817                         /* iov[0] is reserved for smb header */
1818                         iov[1].iov_base = (char *)write_data + total_written;
1819                         iov[1].iov_len = len;
1820                         io_parms.pid = pid;
1821                         io_parms.tcon = tcon;
1822                         io_parms.offset = *offset;
1823                         io_parms.length = len;
1824                         rc = server->ops->sync_write(xid, &open_file->fid,
1825                                         &io_parms, &bytes_written, iov, 1);
1826                 }
1827                 if (rc || (bytes_written == 0)) {
1828                         if (total_written)
1829                                 break;
1830                         else {
1831                                 free_xid(xid);
1832                                 return rc;
1833                         }
1834                 } else {
1835                         spin_lock(&d_inode(dentry)->i_lock);
1836                         cifs_update_eof(cifsi, *offset, bytes_written);
1837                         spin_unlock(&d_inode(dentry)->i_lock);
1838                         *offset += bytes_written;
1839                 }
1840         }
1841
1842         cifs_stats_bytes_written(tcon, total_written);
1843
1844         if (total_written > 0) {
1845                 spin_lock(&d_inode(dentry)->i_lock);
1846                 if (*offset > d_inode(dentry)->i_size)
1847                         i_size_write(d_inode(dentry), *offset);
1848                 spin_unlock(&d_inode(dentry)->i_lock);
1849         }
1850         mark_inode_dirty_sync(d_inode(dentry));
1851         free_xid(xid);
1852         return total_written;
1853 }
1854
1855 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1856                                         bool fsuid_only)
1857 {
1858         struct cifsFileInfo *open_file = NULL;
1859         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1860
1861         /* only filter by fsuid on multiuser mounts */
1862         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1863                 fsuid_only = false;
1864
1865         spin_lock(&cifs_inode->open_file_lock);
1866         /* we could simply get the first_list_entry since write-only entries
1867            are always at the end of the list but since the first entry might
1868            have a close pending, we go through the whole list */
1869         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1870                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1871                         continue;
1872                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1873                         if (!open_file->invalidHandle) {
1874                                 /* found a good file */
1875                                 /* lock it so it will not be closed on us */
1876                                 cifsFileInfo_get(open_file);
1877                                 spin_unlock(&cifs_inode->open_file_lock);
1878                                 return open_file;
1879                         } /* else might as well continue, and look for
1880                              another, or simply have the caller reopen it
1881                              again rather than trying to fix this handle */
1882                 } else /* write only file */
1883                         break; /* write only files are last so must be done */
1884         }
1885         spin_unlock(&cifs_inode->open_file_lock);
1886         return NULL;
1887 }
1888
1889 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1890                                         bool fsuid_only)
1891 {
1892         struct cifsFileInfo *open_file, *inv_file = NULL;
1893         struct cifs_sb_info *cifs_sb;
1894         bool any_available = false;
1895         int rc;
1896         unsigned int refind = 0;
1897
1898         /* Having a null inode here (because mapping->host was set to zero by
1899         the VFS or MM) should not happen but we had reports of on oops (due to
1900         it being zero) during stress testcases so we need to check for it */
1901
1902         if (cifs_inode == NULL) {
1903                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1904                 dump_stack();
1905                 return NULL;
1906         }
1907
1908         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1909
1910         /* only filter by fsuid on multiuser mounts */
1911         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1912                 fsuid_only = false;
1913
1914         spin_lock(&cifs_inode->open_file_lock);
1915 refind_writable:
1916         if (refind > MAX_REOPEN_ATT) {
1917                 spin_unlock(&cifs_inode->open_file_lock);
1918                 return NULL;
1919         }
1920         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1921                 if (!any_available && open_file->pid != current->tgid)
1922                         continue;
1923                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1924                         continue;
1925                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1926                         if (!open_file->invalidHandle) {
1927                                 /* found a good writable file */
1928                                 cifsFileInfo_get(open_file);
1929                                 spin_unlock(&cifs_inode->open_file_lock);
1930                                 return open_file;
1931                         } else {
1932                                 if (!inv_file)
1933                                         inv_file = open_file;
1934                         }
1935                 }
1936         }
1937         /* couldn't find useable FH with same pid, try any available */
1938         if (!any_available) {
1939                 any_available = true;
1940                 goto refind_writable;
1941         }
1942
1943         if (inv_file) {
1944                 any_available = false;
1945                 cifsFileInfo_get(inv_file);
1946         }
1947
1948         spin_unlock(&cifs_inode->open_file_lock);
1949
1950         if (inv_file) {
1951                 rc = cifs_reopen_file(inv_file, false);
1952                 if (!rc)
1953                         return inv_file;
1954                 else {
1955                         spin_lock(&cifs_inode->open_file_lock);
1956                         list_move_tail(&inv_file->flist,
1957                                         &cifs_inode->openFileList);
1958                         spin_unlock(&cifs_inode->open_file_lock);
1959                         cifsFileInfo_put(inv_file);
1960                         ++refind;
1961                         inv_file = NULL;
1962                         spin_lock(&cifs_inode->open_file_lock);
1963                         goto refind_writable;
1964                 }
1965         }
1966
1967         return NULL;
1968 }
1969
1970 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1971 {
1972         struct address_space *mapping = page->mapping;
1973         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1974         char *write_data;
1975         int rc = -EFAULT;
1976         int bytes_written = 0;
1977         struct inode *inode;
1978         struct cifsFileInfo *open_file;
1979
1980         if (!mapping || !mapping->host)
1981                 return -EFAULT;
1982
1983         inode = page->mapping->host;
1984
1985         offset += (loff_t)from;
1986         write_data = kmap(page);
1987         write_data += from;
1988
1989         if ((to > PAGE_SIZE) || (from > to)) {
1990                 kunmap(page);
1991                 return -EIO;
1992         }
1993
1994         /* racing with truncate? */
1995         if (offset > mapping->host->i_size) {
1996                 kunmap(page);
1997                 return 0; /* don't care */
1998         }
1999
2000         /* check to make sure that we are not extending the file */
2001         if (mapping->host->i_size - offset < (loff_t)to)
2002                 to = (unsigned)(mapping->host->i_size - offset);
2003
2004         open_file = find_writable_file(CIFS_I(mapping->host), false);
2005         if (open_file) {
2006                 bytes_written = cifs_write(open_file, open_file->pid,
2007                                            write_data, to - from, &offset);
2008                 cifsFileInfo_put(open_file);
2009                 /* Does mm or vfs already set times? */
2010                 inode->i_atime = inode->i_mtime = current_time(inode);
2011                 if ((bytes_written > 0) && (offset))
2012                         rc = 0;
2013                 else if (bytes_written < 0)
2014                         rc = bytes_written;
2015         } else {
2016                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
2017                 rc = -EIO;
2018         }
2019
2020         kunmap(page);
2021         return rc;
2022 }
2023
2024 static struct cifs_writedata *
2025 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2026                           pgoff_t end, pgoff_t *index,
2027                           unsigned int *found_pages)
2028 {
2029         struct cifs_writedata *wdata;
2030
2031         wdata = cifs_writedata_alloc((unsigned int)tofind,
2032                                      cifs_writev_complete);
2033         if (!wdata)
2034                 return NULL;
2035
2036         *found_pages = find_get_pages_range_tag(mapping, index, end,
2037                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2038         return wdata;
2039 }
2040
2041 static unsigned int
2042 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2043                     struct address_space *mapping,
2044                     struct writeback_control *wbc,
2045                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2046 {
2047         unsigned int nr_pages = 0, i;
2048         struct page *page;
2049
2050         for (i = 0; i < found_pages; i++) {
2051                 page = wdata->pages[i];
2052                 /*
2053                  * At this point we hold neither the i_pages lock nor the
2054                  * page lock: the page may be truncated or invalidated
2055                  * (changing page->mapping to NULL), or even swizzled
2056                  * back from swapper_space to tmpfs file mapping
2057                  */
2058
2059                 if (nr_pages == 0)
2060                         lock_page(page);
2061                 else if (!trylock_page(page))
2062                         break;
2063
2064                 if (unlikely(page->mapping != mapping)) {
2065                         unlock_page(page);
2066                         break;
2067                 }
2068
2069                 if (!wbc->range_cyclic && page->index > end) {
2070                         *done = true;
2071                         unlock_page(page);
2072                         break;
2073                 }
2074
2075                 if (*next && (page->index != *next)) {
2076                         /* Not next consecutive page */
2077                         unlock_page(page);
2078                         break;
2079                 }
2080
2081                 if (wbc->sync_mode != WB_SYNC_NONE)
2082                         wait_on_page_writeback(page);
2083
2084                 if (PageWriteback(page) ||
2085                                 !clear_page_dirty_for_io(page)) {
2086                         unlock_page(page);
2087                         break;
2088                 }
2089
2090                 /*
2091                  * This actually clears the dirty bit in the radix tree.
2092                  * See cifs_writepage() for more commentary.
2093                  */
2094                 set_page_writeback(page);
2095                 if (page_offset(page) >= i_size_read(mapping->host)) {
2096                         *done = true;
2097                         unlock_page(page);
2098                         end_page_writeback(page);
2099                         break;
2100                 }
2101
2102                 wdata->pages[i] = page;
2103                 *next = page->index + 1;
2104                 ++nr_pages;
2105         }
2106
2107         /* reset index to refind any pages skipped */
2108         if (nr_pages == 0)
2109                 *index = wdata->pages[0]->index + 1;
2110
2111         /* put any pages we aren't going to use */
2112         for (i = nr_pages; i < found_pages; i++) {
2113                 put_page(wdata->pages[i]);
2114                 wdata->pages[i] = NULL;
2115         }
2116
2117         return nr_pages;
2118 }
2119
2120 static int
2121 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2122                  struct address_space *mapping, struct writeback_control *wbc)
2123 {
2124         int rc = 0;
2125         struct TCP_Server_Info *server;
2126         unsigned int i;
2127
2128         wdata->sync_mode = wbc->sync_mode;
2129         wdata->nr_pages = nr_pages;
2130         wdata->offset = page_offset(wdata->pages[0]);
2131         wdata->pagesz = PAGE_SIZE;
2132         wdata->tailsz = min(i_size_read(mapping->host) -
2133                         page_offset(wdata->pages[nr_pages - 1]),
2134                         (loff_t)PAGE_SIZE);
2135         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2136
2137         if (wdata->cfile != NULL)
2138                 cifsFileInfo_put(wdata->cfile);
2139         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2140         if (!wdata->cfile) {
2141                 cifs_dbg(VFS, "No writable handles for inode\n");
2142                 rc = -EBADF;
2143         } else {
2144                 wdata->pid = wdata->cfile->pid;
2145                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2146                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2147         }
2148
2149         for (i = 0; i < nr_pages; ++i)
2150                 unlock_page(wdata->pages[i]);
2151
2152         return rc;
2153 }
2154
2155 static int cifs_writepages(struct address_space *mapping,
2156                            struct writeback_control *wbc)
2157 {
2158         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2159         struct TCP_Server_Info *server;
2160         bool done = false, scanned = false, range_whole = false;
2161         pgoff_t end, index;
2162         struct cifs_writedata *wdata;
2163         int rc = 0;
2164         int saved_rc = 0;
2165
2166         /*
2167          * If wsize is smaller than the page cache size, default to writing
2168          * one page at a time via cifs_writepage
2169          */
2170         if (cifs_sb->wsize < PAGE_SIZE)
2171                 return generic_writepages(mapping, wbc);
2172
2173         if (wbc->range_cyclic) {
2174                 index = mapping->writeback_index; /* Start from prev offset */
2175                 end = -1;
2176         } else {
2177                 index = wbc->range_start >> PAGE_SHIFT;
2178                 end = wbc->range_end >> PAGE_SHIFT;
2179                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2180                         range_whole = true;
2181                 scanned = true;
2182         }
2183         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2184 retry:
2185         while (!done && index <= end) {
2186                 unsigned int i, nr_pages, found_pages, wsize, credits;
2187                 pgoff_t next = 0, tofind, saved_index = index;
2188
2189                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2190                                                    &wsize, &credits);
2191                 if (rc != 0) {
2192                         done = true;
2193                         break;
2194                 }
2195
2196                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2197
2198                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2199                                                   &found_pages);
2200                 if (!wdata) {
2201                         rc = -ENOMEM;
2202                         done = true;
2203                         add_credits_and_wake_if(server, credits, 0);
2204                         break;
2205                 }
2206
2207                 if (found_pages == 0) {
2208                         kref_put(&wdata->refcount, cifs_writedata_release);
2209                         add_credits_and_wake_if(server, credits, 0);
2210                         break;
2211                 }
2212
2213                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2214                                                end, &index, &next, &done);
2215
2216                 /* nothing to write? */
2217                 if (nr_pages == 0) {
2218                         kref_put(&wdata->refcount, cifs_writedata_release);
2219                         add_credits_and_wake_if(server, credits, 0);
2220                         continue;
2221                 }
2222
2223                 wdata->credits = credits;
2224
2225                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2226
2227                 /* send failure -- clean up the mess */
2228                 if (rc != 0) {
2229                         add_credits_and_wake_if(server, wdata->credits, 0);
2230                         for (i = 0; i < nr_pages; ++i) {
2231                                 if (is_retryable_error(rc))
2232                                         redirty_page_for_writepage(wbc,
2233                                                            wdata->pages[i]);
2234                                 else
2235                                         SetPageError(wdata->pages[i]);
2236                                 end_page_writeback(wdata->pages[i]);
2237                                 put_page(wdata->pages[i]);
2238                         }
2239                         if (!is_retryable_error(rc))
2240                                 mapping_set_error(mapping, rc);
2241                 }
2242                 kref_put(&wdata->refcount, cifs_writedata_release);
2243
2244                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2245                         index = saved_index;
2246                         continue;
2247                 }
2248
2249                 /* Return immediately if we received a signal during writing */
2250                 if (is_interrupt_error(rc)) {
2251                         done = true;
2252                         break;
2253                 }
2254
2255                 if (rc != 0 && saved_rc == 0)
2256                         saved_rc = rc;
2257
2258                 wbc->nr_to_write -= nr_pages;
2259                 if (wbc->nr_to_write <= 0)
2260                         done = true;
2261
2262                 index = next;
2263         }
2264
2265         if (!scanned && !done) {
2266                 /*
2267                  * We hit the last page and there is more work to be done: wrap
2268                  * back to the start of the file
2269                  */
2270                 scanned = true;
2271                 index = 0;
2272                 goto retry;
2273         }
2274
2275         if (saved_rc != 0)
2276                 rc = saved_rc;
2277
2278         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2279                 mapping->writeback_index = index;
2280
2281         return rc;
2282 }
2283
2284 static int
2285 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2286 {
2287         int rc;
2288         unsigned int xid;
2289
2290         xid = get_xid();
2291 /* BB add check for wbc flags */
2292         get_page(page);
2293         if (!PageUptodate(page))
2294                 cifs_dbg(FYI, "ppw - page not up to date\n");
2295
2296         /*
2297          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2298          *
2299          * A writepage() implementation always needs to do either this,
2300          * or re-dirty the page with "redirty_page_for_writepage()" in
2301          * the case of a failure.
2302          *
2303          * Just unlocking the page will cause the radix tree tag-bits
2304          * to fail to update with the state of the page correctly.
2305          */
2306         set_page_writeback(page);
2307 retry_write:
2308         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2309         if (is_retryable_error(rc)) {
2310                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2311                         goto retry_write;
2312                 redirty_page_for_writepage(wbc, page);
2313         } else if (rc != 0) {
2314                 SetPageError(page);
2315                 mapping_set_error(page->mapping, rc);
2316         } else {
2317                 SetPageUptodate(page);
2318         }
2319         end_page_writeback(page);
2320         put_page(page);
2321         free_xid(xid);
2322         return rc;
2323 }
2324
2325 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2326 {
2327         int rc = cifs_writepage_locked(page, wbc);
2328         unlock_page(page);
2329         return rc;
2330 }
2331
2332 static int cifs_write_end(struct file *file, struct address_space *mapping,
2333                         loff_t pos, unsigned len, unsigned copied,
2334                         struct page *page, void *fsdata)
2335 {
2336         int rc;
2337         struct inode *inode = mapping->host;
2338         struct cifsFileInfo *cfile = file->private_data;
2339         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2340         __u32 pid;
2341
2342         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2343                 pid = cfile->pid;
2344         else
2345                 pid = current->tgid;
2346
2347         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2348                  page, pos, copied);
2349
2350         if (PageChecked(page)) {
2351                 if (copied == len)
2352                         SetPageUptodate(page);
2353                 ClearPageChecked(page);
2354         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2355                 SetPageUptodate(page);
2356
2357         if (!PageUptodate(page)) {
2358                 char *page_data;
2359                 unsigned offset = pos & (PAGE_SIZE - 1);
2360                 unsigned int xid;
2361
2362                 xid = get_xid();
2363                 /* this is probably better than directly calling
2364                    partialpage_write since in this function the file handle is
2365                    known which we might as well leverage */
2366                 /* BB check if anything else missing out of ppw
2367                    such as updating last write time */
2368                 page_data = kmap(page);
2369                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2370                 /* if (rc < 0) should we set writebehind rc? */
2371                 kunmap(page);
2372
2373                 free_xid(xid);
2374         } else {
2375                 rc = copied;
2376                 pos += copied;
2377                 set_page_dirty(page);
2378         }
2379
2380         if (rc > 0) {
2381                 spin_lock(&inode->i_lock);
2382                 if (pos > inode->i_size)
2383                         i_size_write(inode, pos);
2384                 spin_unlock(&inode->i_lock);
2385         }
2386
2387         unlock_page(page);
2388         put_page(page);
2389
2390         return rc;
2391 }
2392
2393 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2394                       int datasync)
2395 {
2396         unsigned int xid;
2397         int rc = 0;
2398         struct cifs_tcon *tcon;
2399         struct TCP_Server_Info *server;
2400         struct cifsFileInfo *smbfile = file->private_data;
2401         struct inode *inode = file_inode(file);
2402         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2403
2404         rc = file_write_and_wait_range(file, start, end);
2405         if (rc)
2406                 return rc;
2407         inode_lock(inode);
2408
2409         xid = get_xid();
2410
2411         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2412                  file, datasync);
2413
2414         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2415                 rc = cifs_zap_mapping(inode);
2416                 if (rc) {
2417                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2418                         rc = 0; /* don't care about it in fsync */
2419                 }
2420         }
2421
2422         tcon = tlink_tcon(smbfile->tlink);
2423         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2424                 server = tcon->ses->server;
2425                 if (server->ops->flush)
2426                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2427                 else
2428                         rc = -ENOSYS;
2429         }
2430
2431         free_xid(xid);
2432         inode_unlock(inode);
2433         return rc;
2434 }
2435
2436 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2437 {
2438         unsigned int xid;
2439         int rc = 0;
2440         struct cifs_tcon *tcon;
2441         struct TCP_Server_Info *server;
2442         struct cifsFileInfo *smbfile = file->private_data;
2443         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2444         struct inode *inode = file->f_mapping->host;
2445
2446         rc = file_write_and_wait_range(file, start, end);
2447         if (rc)
2448                 return rc;
2449         inode_lock(inode);
2450
2451         xid = get_xid();
2452
2453         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2454                  file, datasync);
2455
2456         tcon = tlink_tcon(smbfile->tlink);
2457         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2458                 server = tcon->ses->server;
2459                 if (server->ops->flush)
2460                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2461                 else
2462                         rc = -ENOSYS;
2463         }
2464
2465         free_xid(xid);
2466         inode_unlock(inode);
2467         return rc;
2468 }
2469
2470 /*
2471  * As file closes, flush all cached write data for this inode checking
2472  * for write behind errors.
2473  */
2474 int cifs_flush(struct file *file, fl_owner_t id)
2475 {
2476         struct inode *inode = file_inode(file);
2477         int rc = 0;
2478
2479         if (file->f_mode & FMODE_WRITE)
2480                 rc = filemap_write_and_wait(inode->i_mapping);
2481
2482         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2483
2484         return rc;
2485 }
2486
2487 static int
2488 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2489 {
2490         int rc = 0;
2491         unsigned long i;
2492
2493         for (i = 0; i < num_pages; i++) {
2494                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2495                 if (!pages[i]) {
2496                         /*
2497                          * save number of pages we have already allocated and
2498                          * return with ENOMEM error
2499                          */
2500                         num_pages = i;
2501                         rc = -ENOMEM;
2502                         break;
2503                 }
2504         }
2505
2506         if (rc) {
2507                 for (i = 0; i < num_pages; i++)
2508                         put_page(pages[i]);
2509         }
2510         return rc;
2511 }
2512
2513 static inline
2514 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2515 {
2516         size_t num_pages;
2517         size_t clen;
2518
2519         clen = min_t(const size_t, len, wsize);
2520         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2521
2522         if (cur_len)
2523                 *cur_len = clen;
2524
2525         return num_pages;
2526 }
2527
2528 static void
2529 cifs_uncached_writedata_release(struct kref *refcount)
2530 {
2531         int i;
2532         struct cifs_writedata *wdata = container_of(refcount,
2533                                         struct cifs_writedata, refcount);
2534
2535         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2536         for (i = 0; i < wdata->nr_pages; i++)
2537                 put_page(wdata->pages[i]);
2538         cifs_writedata_release(refcount);
2539 }
2540
2541 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2542
2543 static void
2544 cifs_uncached_writev_complete(struct work_struct *work)
2545 {
2546         struct cifs_writedata *wdata = container_of(work,
2547                                         struct cifs_writedata, work);
2548         struct inode *inode = d_inode(wdata->cfile->dentry);
2549         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2550
2551         spin_lock(&inode->i_lock);
2552         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2553         if (cifsi->server_eof > inode->i_size)
2554                 i_size_write(inode, cifsi->server_eof);
2555         spin_unlock(&inode->i_lock);
2556
2557         complete(&wdata->done);
2558         collect_uncached_write_data(wdata->ctx);
2559         /* the below call can possibly free the last ref to aio ctx */
2560         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2561 }
2562
2563 static int
2564 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2565                       size_t *len, unsigned long *num_pages)
2566 {
2567         size_t save_len, copied, bytes, cur_len = *len;
2568         unsigned long i, nr_pages = *num_pages;
2569
2570         save_len = cur_len;
2571         for (i = 0; i < nr_pages; i++) {
2572                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2573                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2574                 cur_len -= copied;
2575                 /*
2576                  * If we didn't copy as much as we expected, then that
2577                  * may mean we trod into an unmapped area. Stop copying
2578                  * at that point. On the next pass through the big
2579                  * loop, we'll likely end up getting a zero-length
2580                  * write and bailing out of it.
2581                  */
2582                 if (copied < bytes)
2583                         break;
2584         }
2585         cur_len = save_len - cur_len;
2586         *len = cur_len;
2587
2588         /*
2589          * If we have no data to send, then that probably means that
2590          * the copy above failed altogether. That's most likely because
2591          * the address in the iovec was bogus. Return -EFAULT and let
2592          * the caller free anything we allocated and bail out.
2593          */
2594         if (!cur_len)
2595                 return -EFAULT;
2596
2597         /*
2598          * i + 1 now represents the number of pages we actually used in
2599          * the copy phase above.
2600          */
2601         *num_pages = i + 1;
2602         return 0;
2603 }
2604
2605 static int
2606 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2607                      struct cifsFileInfo *open_file,
2608                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2609                      struct cifs_aio_ctx *ctx)
2610 {
2611         int rc = 0;
2612         size_t cur_len;
2613         unsigned long nr_pages, num_pages, i;
2614         struct cifs_writedata *wdata;
2615         struct iov_iter saved_from = *from;
2616         loff_t saved_offset = offset;
2617         pid_t pid;
2618         struct TCP_Server_Info *server;
2619
2620         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2621                 pid = open_file->pid;
2622         else
2623                 pid = current->tgid;
2624
2625         server = tlink_tcon(open_file->tlink)->ses->server;
2626
2627         do {
2628                 unsigned int wsize, credits;
2629
2630                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2631                                                    &wsize, &credits);
2632                 if (rc)
2633                         break;
2634
2635                 nr_pages = get_numpages(wsize, len, &cur_len);
2636                 wdata = cifs_writedata_alloc(nr_pages,
2637                                              cifs_uncached_writev_complete);
2638                 if (!wdata) {
2639                         rc = -ENOMEM;
2640                         add_credits_and_wake_if(server, credits, 0);
2641                         break;
2642                 }
2643
2644                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2645                 if (rc) {
2646                         kfree(wdata);
2647                         add_credits_and_wake_if(server, credits, 0);
2648                         break;
2649                 }
2650
2651                 num_pages = nr_pages;
2652                 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2653                 if (rc) {
2654                         for (i = 0; i < nr_pages; i++)
2655                                 put_page(wdata->pages[i]);
2656                         kfree(wdata);
2657                         add_credits_and_wake_if(server, credits, 0);
2658                         break;
2659                 }
2660
2661                 /*
2662                  * Bring nr_pages down to the number of pages we actually used,
2663                  * and free any pages that we didn't use.
2664                  */
2665                 for ( ; nr_pages > num_pages; nr_pages--)
2666                         put_page(wdata->pages[nr_pages - 1]);
2667
2668                 wdata->sync_mode = WB_SYNC_ALL;
2669                 wdata->nr_pages = nr_pages;
2670                 wdata->offset = (__u64)offset;
2671                 wdata->cfile = cifsFileInfo_get(open_file);
2672                 wdata->pid = pid;
2673                 wdata->bytes = cur_len;
2674                 wdata->pagesz = PAGE_SIZE;
2675                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2676                 wdata->credits = credits;
2677                 wdata->ctx = ctx;
2678                 kref_get(&ctx->refcount);
2679
2680                 if (!wdata->cfile->invalidHandle ||
2681                     !(rc = cifs_reopen_file(wdata->cfile, false)))
2682                         rc = server->ops->async_writev(wdata,
2683                                         cifs_uncached_writedata_release);
2684                 if (rc) {
2685                         add_credits_and_wake_if(server, wdata->credits, 0);
2686                         kref_put(&wdata->refcount,
2687                                  cifs_uncached_writedata_release);
2688                         if (rc == -EAGAIN) {
2689                                 *from = saved_from;
2690                                 iov_iter_advance(from, offset - saved_offset);
2691                                 continue;
2692                         }
2693                         break;
2694                 }
2695
2696                 list_add_tail(&wdata->list, wdata_list);
2697                 offset += cur_len;
2698                 len -= cur_len;
2699         } while (len > 0);
2700
2701         return rc;
2702 }
2703
2704 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2705 {
2706         struct cifs_writedata *wdata, *tmp;
2707         struct cifs_tcon *tcon;
2708         struct cifs_sb_info *cifs_sb;
2709         struct dentry *dentry = ctx->cfile->dentry;
2710         unsigned int i;
2711         int rc;
2712
2713         tcon = tlink_tcon(ctx->cfile->tlink);
2714         cifs_sb = CIFS_SB(dentry->d_sb);
2715
2716         mutex_lock(&ctx->aio_mutex);
2717
2718         if (list_empty(&ctx->list)) {
2719                 mutex_unlock(&ctx->aio_mutex);
2720                 return;
2721         }
2722
2723         rc = ctx->rc;
2724         /*
2725          * Wait for and collect replies for any successful sends in order of
2726          * increasing offset. Once an error is hit, then return without waiting
2727          * for any more replies.
2728          */
2729 restart_loop:
2730         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2731                 if (!rc) {
2732                         if (!try_wait_for_completion(&wdata->done)) {
2733                                 mutex_unlock(&ctx->aio_mutex);
2734                                 return;
2735                         }
2736
2737                         if (wdata->result)
2738                                 rc = wdata->result;
2739                         else
2740                                 ctx->total_len += wdata->bytes;
2741
2742                         /* resend call if it's a retryable error */
2743                         if (rc == -EAGAIN) {
2744                                 struct list_head tmp_list;
2745                                 struct iov_iter tmp_from = ctx->iter;
2746
2747                                 INIT_LIST_HEAD(&tmp_list);
2748                                 list_del_init(&wdata->list);
2749
2750                                 iov_iter_advance(&tmp_from,
2751                                                  wdata->offset - ctx->pos);
2752
2753                                 rc = cifs_write_from_iter(wdata->offset,
2754                                                 wdata->bytes, &tmp_from,
2755                                                 ctx->cfile, cifs_sb, &tmp_list,
2756                                                 ctx);
2757
2758                                 list_splice(&tmp_list, &ctx->list);
2759
2760                                 kref_put(&wdata->refcount,
2761                                          cifs_uncached_writedata_release);
2762                                 goto restart_loop;
2763                         }
2764                 }
2765                 list_del_init(&wdata->list);
2766                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2767         }
2768
2769         for (i = 0; i < ctx->npages; i++)
2770                 put_page(ctx->bv[i].bv_page);
2771
2772         cifs_stats_bytes_written(tcon, ctx->total_len);
2773         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2774
2775         ctx->rc = (rc == 0) ? ctx->total_len : rc;
2776
2777         mutex_unlock(&ctx->aio_mutex);
2778
2779         if (ctx->iocb && ctx->iocb->ki_complete)
2780                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2781         else
2782                 complete(&ctx->done);
2783 }
2784
2785 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2786 {
2787         struct file *file = iocb->ki_filp;
2788         ssize_t total_written = 0;
2789         struct cifsFileInfo *cfile;
2790         struct cifs_tcon *tcon;
2791         struct cifs_sb_info *cifs_sb;
2792         struct cifs_aio_ctx *ctx;
2793         struct iov_iter saved_from = *from;
2794         int rc;
2795
2796         /*
2797          * BB - optimize the way when signing is disabled. We can drop this
2798          * extra memory-to-memory copying and use iovec buffers for constructing
2799          * write request.
2800          */
2801
2802         rc = generic_write_checks(iocb, from);
2803         if (rc <= 0)
2804                 return rc;
2805
2806         cifs_sb = CIFS_FILE_SB(file);
2807         cfile = file->private_data;
2808         tcon = tlink_tcon(cfile->tlink);
2809
2810         if (!tcon->ses->server->ops->async_writev)
2811                 return -ENOSYS;
2812
2813         ctx = cifs_aio_ctx_alloc();
2814         if (!ctx)
2815                 return -ENOMEM;
2816
2817         ctx->cfile = cifsFileInfo_get(cfile);
2818
2819         if (!is_sync_kiocb(iocb))
2820                 ctx->iocb = iocb;
2821
2822         ctx->pos = iocb->ki_pos;
2823
2824         rc = setup_aio_ctx_iter(ctx, from, WRITE);
2825         if (rc) {
2826                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2827                 return rc;
2828         }
2829
2830         /* grab a lock here due to read response handlers can access ctx */
2831         mutex_lock(&ctx->aio_mutex);
2832
2833         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2834                                   cfile, cifs_sb, &ctx->list, ctx);
2835
2836         /*
2837          * If at least one write was successfully sent, then discard any rc
2838          * value from the later writes. If the other write succeeds, then
2839          * we'll end up returning whatever was written. If it fails, then
2840          * we'll get a new rc value from that.
2841          */
2842         if (!list_empty(&ctx->list))
2843                 rc = 0;
2844
2845         mutex_unlock(&ctx->aio_mutex);
2846
2847         if (rc) {
2848                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2849                 return rc;
2850         }
2851
2852         if (!is_sync_kiocb(iocb)) {
2853                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2854                 return -EIOCBQUEUED;
2855         }
2856
2857         rc = wait_for_completion_killable(&ctx->done);
2858         if (rc) {
2859                 mutex_lock(&ctx->aio_mutex);
2860                 ctx->rc = rc = -EINTR;
2861                 total_written = ctx->total_len;
2862                 mutex_unlock(&ctx->aio_mutex);
2863         } else {
2864                 rc = ctx->rc;
2865                 total_written = ctx->total_len;
2866         }
2867
2868         kref_put(&ctx->refcount, cifs_aio_ctx_release);
2869
2870         if (unlikely(!total_written))
2871                 return rc;
2872
2873         iocb->ki_pos += total_written;
2874         return total_written;
2875 }
2876
2877 static ssize_t
2878 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2879 {
2880         struct file *file = iocb->ki_filp;
2881         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2882         struct inode *inode = file->f_mapping->host;
2883         struct cifsInodeInfo *cinode = CIFS_I(inode);
2884         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2885         ssize_t rc;
2886
2887         inode_lock(inode);
2888         /*
2889          * We need to hold the sem to be sure nobody modifies lock list
2890          * with a brlock that prevents writing.
2891          */
2892         down_read(&cinode->lock_sem);
2893
2894         rc = generic_write_checks(iocb, from);
2895         if (rc <= 0)
2896                 goto out;
2897
2898         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2899                                      server->vals->exclusive_lock_type, NULL,
2900                                      CIFS_WRITE_OP))
2901                 rc = __generic_file_write_iter(iocb, from);
2902         else
2903                 rc = -EACCES;
2904 out:
2905         up_read(&cinode->lock_sem);
2906         inode_unlock(inode);
2907
2908         if (rc > 0)
2909                 rc = generic_write_sync(iocb, rc);
2910         return rc;
2911 }
2912
2913 ssize_t
2914 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2915 {
2916         struct inode *inode = file_inode(iocb->ki_filp);
2917         struct cifsInodeInfo *cinode = CIFS_I(inode);
2918         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2919         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2920                                                 iocb->ki_filp->private_data;
2921         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2922         ssize_t written;
2923
2924         written = cifs_get_writer(cinode);
2925         if (written)
2926                 return written;
2927
2928         if (CIFS_CACHE_WRITE(cinode)) {
2929                 if (cap_unix(tcon->ses) &&
2930                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2931                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2932                         written = generic_file_write_iter(iocb, from);
2933                         goto out;
2934                 }
2935                 written = cifs_writev(iocb, from);
2936                 goto out;
2937         }
2938         /*
2939          * For non-oplocked files in strict cache mode we need to write the data
2940          * to the server exactly from the pos to pos+len-1 rather than flush all
2941          * affected pages because it may cause a error with mandatory locks on
2942          * these pages but not on the region from pos to ppos+len-1.
2943          */
2944         written = cifs_user_writev(iocb, from);
2945         if (CIFS_CACHE_READ(cinode)) {
2946                 /*
2947                  * We have read level caching and we have just sent a write
2948                  * request to the server thus making data in the cache stale.
2949                  * Zap the cache and set oplock/lease level to NONE to avoid
2950                  * reading stale data from the cache. All subsequent read
2951                  * operations will read new data from the server.
2952                  */
2953                 cifs_zap_mapping(inode);
2954                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
2955                          inode);
2956                 cinode->oplock = 0;
2957         }
2958 out:
2959         cifs_put_writer(cinode);
2960         return written;
2961 }
2962
2963 static struct cifs_readdata *
2964 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
2965 {
2966         struct cifs_readdata *rdata;
2967
2968         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
2969         if (rdata != NULL) {
2970                 rdata->pages = pages;
2971                 kref_init(&rdata->refcount);
2972                 INIT_LIST_HEAD(&rdata->list);
2973                 init_completion(&rdata->done);
2974                 INIT_WORK(&rdata->work, complete);
2975         }
2976
2977         return rdata;
2978 }
2979
2980 static struct cifs_readdata *
2981 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2982 {
2983         struct page **pages =
2984                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
2985         struct cifs_readdata *ret = NULL;
2986
2987         if (pages) {
2988                 ret = cifs_readdata_direct_alloc(pages, complete);
2989                 if (!ret)
2990                         kfree(pages);
2991         }
2992
2993         return ret;
2994 }
2995
2996 void
2997 cifs_readdata_release(struct kref *refcount)
2998 {
2999         struct cifs_readdata *rdata = container_of(refcount,
3000                                         struct cifs_readdata, refcount);
3001 #ifdef CONFIG_CIFS_SMB_DIRECT
3002         if (rdata->mr) {
3003                 smbd_deregister_mr(rdata->mr);
3004                 rdata->mr = NULL;
3005         }
3006 #endif
3007         if (rdata->cfile)
3008                 cifsFileInfo_put(rdata->cfile);
3009
3010         kvfree(rdata->pages);
3011         kfree(rdata);
3012 }
3013
3014 static int
3015 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3016 {
3017         int rc = 0;
3018         struct page *page;
3019         unsigned int i;
3020
3021         for (i = 0; i < nr_pages; i++) {
3022                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3023                 if (!page) {
3024                         rc = -ENOMEM;
3025                         break;
3026                 }
3027                 rdata->pages[i] = page;
3028         }
3029
3030         if (rc) {
3031                 unsigned int nr_page_failed = i;
3032
3033                 for (i = 0; i < nr_page_failed; i++) {
3034                         put_page(rdata->pages[i]);
3035                         rdata->pages[i] = NULL;
3036                 }
3037         }
3038         return rc;
3039 }
3040
3041 static void
3042 cifs_uncached_readdata_release(struct kref *refcount)
3043 {
3044         struct cifs_readdata *rdata = container_of(refcount,
3045                                         struct cifs_readdata, refcount);
3046         unsigned int i;
3047
3048         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3049         for (i = 0; i < rdata->nr_pages; i++) {
3050                 put_page(rdata->pages[i]);
3051                 rdata->pages[i] = NULL;
3052         }
3053         cifs_readdata_release(refcount);
3054 }
3055
3056 /**
3057  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3058  * @rdata:      the readdata response with list of pages holding data
3059  * @iter:       destination for our data
3060  *
3061  * This function copies data from a list of pages in a readdata response into
3062  * an array of iovecs. It will first calculate where the data should go
3063  * based on the info in the readdata and then copy the data into that spot.
3064  */
3065 static int
3066 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3067 {
3068         size_t remaining = rdata->got_bytes;
3069         unsigned int i;
3070
3071         for (i = 0; i < rdata->nr_pages; i++) {
3072                 struct page *page = rdata->pages[i];
3073                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3074                 size_t written;
3075
3076                 if (unlikely(iter->type & ITER_PIPE)) {
3077                         void *addr = kmap_atomic(page);
3078
3079                         written = copy_to_iter(addr, copy, iter);
3080                         kunmap_atomic(addr);
3081                 } else
3082                         written = copy_page_to_iter(page, 0, copy, iter);
3083                 remaining -= written;
3084                 if (written < copy && iov_iter_count(iter) > 0)
3085                         break;
3086         }
3087         return remaining ? -EFAULT : 0;
3088 }
3089
3090 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3091
3092 static void
3093 cifs_uncached_readv_complete(struct work_struct *work)
3094 {
3095         struct cifs_readdata *rdata = container_of(work,
3096                                                 struct cifs_readdata, work);
3097
3098         complete(&rdata->done);
3099         collect_uncached_read_data(rdata->ctx);
3100         /* the below call can possibly free the last ref to aio ctx */
3101         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3102 }
3103
3104 static int
3105 uncached_fill_pages(struct TCP_Server_Info *server,
3106                     struct cifs_readdata *rdata, struct iov_iter *iter,
3107                     unsigned int len)
3108 {
3109         int result = 0;
3110         unsigned int i;
3111         unsigned int nr_pages = rdata->nr_pages;
3112         unsigned int page_offset = rdata->page_offset;
3113
3114         rdata->got_bytes = 0;
3115         rdata->tailsz = PAGE_SIZE;
3116         for (i = 0; i < nr_pages; i++) {
3117                 struct page *page = rdata->pages[i];
3118                 size_t n;
3119                 unsigned int segment_size = rdata->pagesz;
3120
3121                 if (i == 0)
3122                         segment_size -= page_offset;
3123                 else
3124                         page_offset = 0;
3125
3126
3127                 if (len <= 0) {
3128                         /* no need to hold page hostage */
3129                         rdata->pages[i] = NULL;
3130                         rdata->nr_pages--;
3131                         put_page(page);
3132                         continue;
3133                 }
3134
3135                 n = len;
3136                 if (len >= segment_size)
3137                         /* enough data to fill the page */
3138                         n = segment_size;
3139                 else
3140                         rdata->tailsz = len;
3141                 len -= n;
3142
3143                 if (iter)
3144                         result = copy_page_from_iter(
3145                                         page, page_offset, n, iter);
3146 #ifdef CONFIG_CIFS_SMB_DIRECT
3147                 else if (rdata->mr)
3148                         result = n;
3149 #endif
3150                 else
3151                         result = cifs_read_page_from_socket(
3152                                         server, page, page_offset, n);
3153                 if (result < 0)
3154                         break;
3155
3156                 rdata->got_bytes += result;
3157         }
3158
3159         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3160                                                 rdata->got_bytes : result;
3161 }
3162
3163 static int
3164 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3165                               struct cifs_readdata *rdata, unsigned int len)
3166 {
3167         return uncached_fill_pages(server, rdata, NULL, len);
3168 }
3169
3170 static int
3171 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3172                               struct cifs_readdata *rdata,
3173                               struct iov_iter *iter)
3174 {
3175         return uncached_fill_pages(server, rdata, iter, iter->count);
3176 }
3177
3178 static int
3179 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3180                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3181                      struct cifs_aio_ctx *ctx)
3182 {
3183         struct cifs_readdata *rdata;
3184         unsigned int npages, rsize, credits;
3185         size_t cur_len;
3186         int rc;
3187         pid_t pid;
3188         struct TCP_Server_Info *server;
3189
3190         server = tlink_tcon(open_file->tlink)->ses->server;
3191
3192         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3193                 pid = open_file->pid;
3194         else
3195                 pid = current->tgid;
3196
3197         do {
3198                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3199                                                    &rsize, &credits);
3200                 if (rc)
3201                         break;
3202
3203                 cur_len = min_t(const size_t, len, rsize);
3204                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3205
3206                 /* allocate a readdata struct */
3207                 rdata = cifs_readdata_alloc(npages,
3208                                             cifs_uncached_readv_complete);
3209                 if (!rdata) {
3210                         add_credits_and_wake_if(server, credits, 0);
3211                         rc = -ENOMEM;
3212                         break;
3213                 }
3214
3215                 rc = cifs_read_allocate_pages(rdata, npages);
3216                 if (rc)
3217                         goto error;
3218
3219                 rdata->cfile = cifsFileInfo_get(open_file);
3220                 rdata->nr_pages = npages;
3221                 rdata->offset = offset;
3222                 rdata->bytes = cur_len;
3223                 rdata->pid = pid;
3224                 rdata->pagesz = PAGE_SIZE;
3225                 rdata->tailsz = PAGE_SIZE;
3226                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3227                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3228                 rdata->credits = credits;
3229                 rdata->ctx = ctx;
3230                 kref_get(&ctx->refcount);
3231
3232                 if (!rdata->cfile->invalidHandle ||
3233                     !(rc = cifs_reopen_file(rdata->cfile, true)))
3234                         rc = server->ops->async_readv(rdata);
3235 error:
3236                 if (rc) {
3237                         add_credits_and_wake_if(server, rdata->credits, 0);
3238                         kref_put(&rdata->refcount,
3239                                  cifs_uncached_readdata_release);
3240                         if (rc == -EAGAIN)
3241                                 continue;
3242                         break;
3243                 }
3244
3245                 list_add_tail(&rdata->list, rdata_list);
3246                 offset += cur_len;
3247                 len -= cur_len;
3248         } while (len > 0);
3249
3250         return rc;
3251 }
3252
3253 static void
3254 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3255 {
3256         struct cifs_readdata *rdata, *tmp;
3257         struct iov_iter *to = &ctx->iter;
3258         struct cifs_sb_info *cifs_sb;
3259         struct cifs_tcon *tcon;
3260         unsigned int i;
3261         int rc;
3262
3263         tcon = tlink_tcon(ctx->cfile->tlink);
3264         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3265
3266         mutex_lock(&ctx->aio_mutex);
3267
3268         if (list_empty(&ctx->list)) {
3269                 mutex_unlock(&ctx->aio_mutex);
3270                 return;
3271         }
3272
3273         rc = ctx->rc;
3274         /* the loop below should proceed in the order of increasing offsets */
3275 again:
3276         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3277                 if (!rc) {
3278                         if (!try_wait_for_completion(&rdata->done)) {
3279                                 mutex_unlock(&ctx->aio_mutex);
3280                                 return;
3281                         }
3282
3283                         if (rdata->result == -EAGAIN) {
3284                                 /* resend call if it's a retryable error */
3285                                 struct list_head tmp_list;
3286                                 unsigned int got_bytes = rdata->got_bytes;
3287
3288                                 list_del_init(&rdata->list);
3289                                 INIT_LIST_HEAD(&tmp_list);
3290
3291                                 /*
3292                                  * Got a part of data and then reconnect has
3293                                  * happened -- fill the buffer and continue
3294                                  * reading.
3295                                  */
3296                                 if (got_bytes && got_bytes < rdata->bytes) {
3297                                         rc = cifs_readdata_to_iov(rdata, to);
3298                                         if (rc) {
3299                                                 kref_put(&rdata->refcount,
3300                                                 cifs_uncached_readdata_release);
3301                                                 continue;
3302                                         }
3303                                 }
3304
3305                                 rc = cifs_send_async_read(
3306                                                 rdata->offset + got_bytes,
3307                                                 rdata->bytes - got_bytes,
3308                                                 rdata->cfile, cifs_sb,
3309                                                 &tmp_list, ctx);
3310
3311                                 list_splice(&tmp_list, &ctx->list);
3312
3313                                 kref_put(&rdata->refcount,
3314                                          cifs_uncached_readdata_release);
3315                                 goto again;
3316                         } else if (rdata->result)
3317                                 rc = rdata->result;
3318                         else
3319                                 rc = cifs_readdata_to_iov(rdata, to);
3320
3321                         /* if there was a short read -- discard anything left */
3322                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3323                                 rc = -ENODATA;
3324                 }
3325                 list_del_init(&rdata->list);
3326                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3327         }
3328
3329         for (i = 0; i < ctx->npages; i++) {
3330                 if (ctx->should_dirty)
3331                         set_page_dirty(ctx->bv[i].bv_page);
3332                 put_page(ctx->bv[i].bv_page);
3333         }
3334
3335         ctx->total_len = ctx->len - iov_iter_count(to);
3336
3337         cifs_stats_bytes_read(tcon, ctx->total_len);
3338
3339         /* mask nodata case */
3340         if (rc == -ENODATA)
3341                 rc = 0;
3342
3343         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3344
3345         mutex_unlock(&ctx->aio_mutex);
3346
3347         if (ctx->iocb && ctx->iocb->ki_complete)
3348                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3349         else
3350                 complete(&ctx->done);
3351 }
3352
3353 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3354 {
3355         struct file *file = iocb->ki_filp;
3356         ssize_t rc;
3357         size_t len;
3358         ssize_t total_read = 0;
3359         loff_t offset = iocb->ki_pos;
3360         struct cifs_sb_info *cifs_sb;
3361         struct cifs_tcon *tcon;
3362         struct cifsFileInfo *cfile;
3363         struct cifs_aio_ctx *ctx;
3364
3365         len = iov_iter_count(to);
3366         if (!len)
3367                 return 0;
3368
3369         cifs_sb = CIFS_FILE_SB(file);
3370         cfile = file->private_data;
3371         tcon = tlink_tcon(cfile->tlink);
3372
3373         if (!tcon->ses->server->ops->async_readv)
3374                 return -ENOSYS;
3375
3376         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3377                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3378
3379         ctx = cifs_aio_ctx_alloc();
3380         if (!ctx)
3381                 return -ENOMEM;
3382
3383         ctx->cfile = cifsFileInfo_get(cfile);
3384
3385         if (!is_sync_kiocb(iocb))
3386                 ctx->iocb = iocb;
3387
3388         if (to->type == ITER_IOVEC)
3389                 ctx->should_dirty = true;
3390
3391         rc = setup_aio_ctx_iter(ctx, to, READ);
3392         if (rc) {
3393                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3394                 return rc;
3395         }
3396
3397         len = ctx->len;
3398
3399         /* grab a lock here due to read response handlers can access ctx */
3400         mutex_lock(&ctx->aio_mutex);
3401
3402         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3403
3404         /* if at least one read request send succeeded, then reset rc */
3405         if (!list_empty(&ctx->list))
3406                 rc = 0;
3407
3408         mutex_unlock(&ctx->aio_mutex);
3409
3410         if (rc) {
3411                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3412                 return rc;
3413         }
3414
3415         if (!is_sync_kiocb(iocb)) {
3416                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3417                 return -EIOCBQUEUED;
3418         }
3419
3420         rc = wait_for_completion_killable(&ctx->done);
3421         if (rc) {
3422                 mutex_lock(&ctx->aio_mutex);
3423                 ctx->rc = rc = -EINTR;
3424                 total_read = ctx->total_len;
3425                 mutex_unlock(&ctx->aio_mutex);
3426         } else {
3427                 rc = ctx->rc;
3428                 total_read = ctx->total_len;
3429         }
3430
3431         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3432
3433         if (total_read) {
3434                 iocb->ki_pos += total_read;
3435                 return total_read;
3436         }
3437         return rc;
3438 }
3439
3440 ssize_t
3441 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3442 {
3443         struct inode *inode = file_inode(iocb->ki_filp);
3444         struct cifsInodeInfo *cinode = CIFS_I(inode);
3445         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3446         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3447                                                 iocb->ki_filp->private_data;
3448         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3449         int rc = -EACCES;
3450
3451         /*
3452          * In strict cache mode we need to read from the server all the time
3453          * if we don't have level II oplock because the server can delay mtime
3454          * change - so we can't make a decision about inode invalidating.
3455          * And we can also fail with pagereading if there are mandatory locks
3456          * on pages affected by this read but not on the region from pos to
3457          * pos+len-1.
3458          */
3459         if (!CIFS_CACHE_READ(cinode))
3460                 return cifs_user_readv(iocb, to);
3461
3462         if (cap_unix(tcon->ses) &&
3463             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3464             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3465                 return generic_file_read_iter(iocb, to);
3466
3467         /*
3468          * We need to hold the sem to be sure nobody modifies lock list
3469          * with a brlock that prevents reading.
3470          */
3471         down_read(&cinode->lock_sem);
3472         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3473                                      tcon->ses->server->vals->shared_lock_type,
3474                                      NULL, CIFS_READ_OP))
3475                 rc = generic_file_read_iter(iocb, to);
3476         up_read(&cinode->lock_sem);
3477         return rc;
3478 }
3479
3480 static ssize_t
3481 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3482 {
3483         int rc = -EACCES;
3484         unsigned int bytes_read = 0;
3485         unsigned int total_read;
3486         unsigned int current_read_size;
3487         unsigned int rsize;
3488         struct cifs_sb_info *cifs_sb;
3489         struct cifs_tcon *tcon;
3490         struct TCP_Server_Info *server;
3491         unsigned int xid;
3492         char *cur_offset;
3493         struct cifsFileInfo *open_file;
3494         struct cifs_io_parms io_parms;
3495         int buf_type = CIFS_NO_BUFFER;
3496         __u32 pid;
3497
3498         xid = get_xid();
3499         cifs_sb = CIFS_FILE_SB(file);
3500
3501         /* FIXME: set up handlers for larger reads and/or convert to async */
3502         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3503
3504         if (file->private_data == NULL) {
3505                 rc = -EBADF;
3506                 free_xid(xid);
3507                 return rc;
3508         }
3509         open_file = file->private_data;
3510         tcon = tlink_tcon(open_file->tlink);
3511         server = tcon->ses->server;
3512
3513         if (!server->ops->sync_read) {
3514                 free_xid(xid);
3515                 return -ENOSYS;
3516         }
3517
3518         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3519                 pid = open_file->pid;
3520         else
3521                 pid = current->tgid;
3522
3523         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3524                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3525
3526         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3527              total_read += bytes_read, cur_offset += bytes_read) {
3528                 do {
3529                         current_read_size = min_t(uint, read_size - total_read,
3530                                                   rsize);
3531                         /*
3532                          * For windows me and 9x we do not want to request more
3533                          * than it negotiated since it will refuse the read
3534                          * then.
3535                          */
3536                         if (!(tcon->ses->capabilities &
3537                                 tcon->ses->server->vals->cap_large_files)) {
3538                                 current_read_size = min_t(uint,
3539                                         current_read_size, CIFSMaxBufSize);
3540                         }
3541                         if (open_file->invalidHandle) {
3542                                 rc = cifs_reopen_file(open_file, true);
3543                                 if (rc != 0)
3544                                         break;
3545                         }
3546                         io_parms.pid = pid;
3547                         io_parms.tcon = tcon;
3548                         io_parms.offset = *offset;
3549                         io_parms.length = current_read_size;
3550                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3551                                                     &bytes_read, &cur_offset,
3552                                                     &buf_type);
3553                 } while (rc == -EAGAIN);
3554
3555                 if (rc || (bytes_read == 0)) {
3556                         if (total_read) {
3557                                 break;
3558                         } else {
3559                                 free_xid(xid);
3560                                 return rc;
3561                         }
3562                 } else {
3563                         cifs_stats_bytes_read(tcon, total_read);
3564                         *offset += bytes_read;
3565                 }
3566         }
3567         free_xid(xid);
3568         return total_read;
3569 }
3570
3571 /*
3572  * If the page is mmap'ed into a process' page tables, then we need to make
3573  * sure that it doesn't change while being written back.
3574  */
3575 static vm_fault_t
3576 cifs_page_mkwrite(struct vm_fault *vmf)
3577 {
3578         struct page *page = vmf->page;
3579
3580         lock_page(page);
3581         return VM_FAULT_LOCKED;
3582 }
3583
3584 static const struct vm_operations_struct cifs_file_vm_ops = {
3585         .fault = filemap_fault,
3586         .map_pages = filemap_map_pages,
3587         .page_mkwrite = cifs_page_mkwrite,
3588 };
3589
3590 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3591 {
3592         int xid, rc = 0;
3593         struct inode *inode = file_inode(file);
3594
3595         xid = get_xid();
3596
3597         if (!CIFS_CACHE_READ(CIFS_I(inode)))
3598                 rc = cifs_zap_mapping(inode);
3599         if (!rc)
3600                 rc = generic_file_mmap(file, vma);
3601         if (!rc)
3602                 vma->vm_ops = &cifs_file_vm_ops;
3603
3604         free_xid(xid);
3605         return rc;
3606 }
3607
3608 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3609 {
3610         int rc, xid;
3611
3612         xid = get_xid();
3613
3614         rc = cifs_revalidate_file(file);
3615         if (rc)
3616                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3617                          rc);
3618         if (!rc)
3619                 rc = generic_file_mmap(file, vma);
3620         if (!rc)
3621                 vma->vm_ops = &cifs_file_vm_ops;
3622
3623         free_xid(xid);
3624         return rc;
3625 }
3626
3627 static void
3628 cifs_readv_complete(struct work_struct *work)
3629 {
3630         unsigned int i, got_bytes;
3631         struct cifs_readdata *rdata = container_of(work,
3632                                                 struct cifs_readdata, work);
3633
3634         got_bytes = rdata->got_bytes;
3635         for (i = 0; i < rdata->nr_pages; i++) {
3636                 struct page *page = rdata->pages[i];
3637
3638                 lru_cache_add_file(page);
3639
3640                 if (rdata->result == 0 ||
3641                     (rdata->result == -EAGAIN && got_bytes)) {
3642                         flush_dcache_page(page);
3643                         SetPageUptodate(page);
3644                 }
3645
3646                 unlock_page(page);
3647
3648                 if (rdata->result == 0 ||
3649                     (rdata->result == -EAGAIN && got_bytes))
3650                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3651
3652                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3653
3654                 put_page(page);
3655                 rdata->pages[i] = NULL;
3656         }
3657         kref_put(&rdata->refcount, cifs_readdata_release);
3658 }
3659
3660 static int
3661 readpages_fill_pages(struct TCP_Server_Info *server,
3662                      struct cifs_readdata *rdata, struct iov_iter *iter,
3663                      unsigned int len)
3664 {
3665         int result = 0;
3666         unsigned int i;
3667         u64 eof;
3668         pgoff_t eof_index;
3669         unsigned int nr_pages = rdata->nr_pages;
3670         unsigned int page_offset = rdata->page_offset;
3671
3672         /* determine the eof that the server (probably) has */
3673         eof = CIFS_I(rdata->mapping->host)->server_eof;
3674         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3675         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3676
3677         rdata->got_bytes = 0;
3678         rdata->tailsz = PAGE_SIZE;
3679         for (i = 0; i < nr_pages; i++) {
3680                 struct page *page = rdata->pages[i];
3681                 unsigned int to_read = rdata->pagesz;
3682                 size_t n;
3683
3684                 if (i == 0)
3685                         to_read -= page_offset;
3686                 else
3687                         page_offset = 0;
3688
3689                 n = to_read;
3690
3691                 if (len >= to_read) {
3692                         len -= to_read;
3693                 } else if (len > 0) {
3694                         /* enough for partial page, fill and zero the rest */
3695                         zero_user(page, len + page_offset, to_read - len);
3696                         n = rdata->tailsz = len;
3697                         len = 0;
3698                 } else if (page->index > eof_index) {
3699                         /*
3700                          * The VFS will not try to do readahead past the
3701                          * i_size, but it's possible that we have outstanding
3702                          * writes with gaps in the middle and the i_size hasn't
3703                          * caught up yet. Populate those with zeroed out pages
3704                          * to prevent the VFS from repeatedly attempting to
3705                          * fill them until the writes are flushed.
3706                          */
3707                         zero_user(page, 0, PAGE_SIZE);
3708                         lru_cache_add_file(page);
3709                         flush_dcache_page(page);
3710                         SetPageUptodate(page);
3711                         unlock_page(page);
3712                         put_page(page);
3713                         rdata->pages[i] = NULL;
3714                         rdata->nr_pages--;
3715                         continue;
3716                 } else {
3717                         /* no need to hold page hostage */
3718                         lru_cache_add_file(page);
3719                         unlock_page(page);
3720                         put_page(page);
3721                         rdata->pages[i] = NULL;
3722                         rdata->nr_pages--;
3723                         continue;
3724                 }
3725
3726                 if (iter)
3727                         result = copy_page_from_iter(
3728                                         page, page_offset, n, iter);
3729 #ifdef CONFIG_CIFS_SMB_DIRECT
3730                 else if (rdata->mr)
3731                         result = n;
3732 #endif
3733                 else
3734                         result = cifs_read_page_from_socket(
3735                                         server, page, page_offset, n);
3736                 if (result < 0)
3737                         break;
3738
3739                 rdata->got_bytes += result;
3740         }
3741
3742         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3743                                                 rdata->got_bytes : result;
3744 }
3745
3746 static int
3747 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3748                                struct cifs_readdata *rdata, unsigned int len)
3749 {
3750         return readpages_fill_pages(server, rdata, NULL, len);
3751 }
3752
3753 static int
3754 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3755                                struct cifs_readdata *rdata,
3756                                struct iov_iter *iter)
3757 {
3758         return readpages_fill_pages(server, rdata, iter, iter->count);
3759 }
3760
3761 static int
3762 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3763                     unsigned int rsize, struct list_head *tmplist,
3764                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3765 {
3766         struct page *page, *tpage;
3767         unsigned int expected_index;
3768         int rc;
3769         gfp_t gfp = readahead_gfp_mask(mapping);
3770
3771         INIT_LIST_HEAD(tmplist);
3772
3773         page = list_entry(page_list->prev, struct page, lru);
3774
3775         /*
3776          * Lock the page and put it in the cache. Since no one else
3777          * should have access to this page, we're safe to simply set
3778          * PG_locked without checking it first.
3779          */
3780         __SetPageLocked(page);
3781         rc = add_to_page_cache_locked(page, mapping,
3782                                       page->index, gfp);
3783
3784         /* give up if we can't stick it in the cache */
3785         if (rc) {
3786                 __ClearPageLocked(page);
3787                 return rc;
3788         }
3789
3790         /* move first page to the tmplist */
3791         *offset = (loff_t)page->index << PAGE_SHIFT;
3792         *bytes = PAGE_SIZE;
3793         *nr_pages = 1;
3794         list_move_tail(&page->lru, tmplist);
3795
3796         /* now try and add more pages onto the request */
3797         expected_index = page->index + 1;
3798         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3799                 /* discontinuity ? */
3800                 if (page->index != expected_index)
3801                         break;
3802
3803                 /* would this page push the read over the rsize? */
3804                 if (*bytes + PAGE_SIZE > rsize)
3805                         break;
3806
3807                 __SetPageLocked(page);
3808                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
3809                 if (rc) {
3810                         __ClearPageLocked(page);
3811                         break;
3812                 }
3813                 list_move_tail(&page->lru, tmplist);
3814                 (*bytes) += PAGE_SIZE;
3815                 expected_index++;
3816                 (*nr_pages)++;
3817         }
3818         return rc;
3819 }
3820
3821 static int cifs_readpages(struct file *file, struct address_space *mapping,
3822         struct list_head *page_list, unsigned num_pages)
3823 {
3824         int rc;
3825         int err = 0;
3826         struct list_head tmplist;
3827         struct cifsFileInfo *open_file = file->private_data;
3828         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3829         struct TCP_Server_Info *server;
3830         pid_t pid;
3831
3832         /*
3833          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3834          * immediately if the cookie is negative
3835          *
3836          * After this point, every page in the list might have PG_fscache set,
3837          * so we will need to clean that up off of every page we don't use.
3838          */
3839         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3840                                          &num_pages);
3841         if (rc == 0)
3842                 return rc;
3843
3844         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3845                 pid = open_file->pid;
3846         else
3847                 pid = current->tgid;
3848
3849         rc = 0;
3850         server = tlink_tcon(open_file->tlink)->ses->server;
3851
3852         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3853                  __func__, file, mapping, num_pages);
3854
3855         /*
3856          * Start with the page at end of list and move it to private
3857          * list. Do the same with any following pages until we hit
3858          * the rsize limit, hit an index discontinuity, or run out of
3859          * pages. Issue the async read and then start the loop again
3860          * until the list is empty.
3861          *
3862          * Note that list order is important. The page_list is in
3863          * the order of declining indexes. When we put the pages in
3864          * the rdata->pages, then we want them in increasing order.
3865          */
3866         while (!list_empty(page_list) && !err) {
3867                 unsigned int i, nr_pages, bytes, rsize;
3868                 loff_t offset;
3869                 struct page *page, *tpage;
3870                 struct cifs_readdata *rdata;
3871                 unsigned credits;
3872
3873                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3874                                                    &rsize, &credits);
3875                 if (rc)
3876                         break;
3877
3878                 /*
3879                  * Give up immediately if rsize is too small to read an entire
3880                  * page. The VFS will fall back to readpage. We should never
3881                  * reach this point however since we set ra_pages to 0 when the
3882                  * rsize is smaller than a cache page.
3883                  */
3884                 if (unlikely(rsize < PAGE_SIZE)) {
3885                         add_credits_and_wake_if(server, credits, 0);
3886                         return 0;
3887                 }
3888
3889                 nr_pages = 0;
3890                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3891                                          &nr_pages, &offset, &bytes);
3892                 if (!nr_pages) {
3893                         add_credits_and_wake_if(server, credits, 0);
3894                         break;
3895                 }
3896
3897                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3898                 if (!rdata) {
3899                         /* best to give up if we're out of mem */
3900                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3901                                 list_del(&page->lru);
3902                                 lru_cache_add_file(page);
3903                                 unlock_page(page);
3904                                 put_page(page);
3905                         }
3906                         rc = -ENOMEM;
3907                         add_credits_and_wake_if(server, credits, 0);
3908                         break;
3909                 }
3910
3911                 rdata->cfile = cifsFileInfo_get(open_file);
3912                 rdata->mapping = mapping;
3913                 rdata->offset = offset;
3914                 rdata->bytes = bytes;
3915                 rdata->pid = pid;
3916                 rdata->pagesz = PAGE_SIZE;
3917                 rdata->tailsz = PAGE_SIZE;
3918                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3919                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
3920                 rdata->credits = credits;
3921
3922                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3923                         list_del(&page->lru);
3924                         rdata->pages[rdata->nr_pages++] = page;
3925                 }
3926
3927                 if (!rdata->cfile->invalidHandle ||
3928                     !(rc = cifs_reopen_file(rdata->cfile, true)))
3929                         rc = server->ops->async_readv(rdata);
3930                 if (rc) {
3931                         add_credits_and_wake_if(server, rdata->credits, 0);
3932                         for (i = 0; i < rdata->nr_pages; i++) {
3933                                 page = rdata->pages[i];
3934                                 lru_cache_add_file(page);
3935                                 unlock_page(page);
3936                                 put_page(page);
3937                         }
3938                         /* Fallback to the readpage in error/reconnect cases */
3939                         kref_put(&rdata->refcount, cifs_readdata_release);
3940                         break;
3941                 }
3942
3943                 kref_put(&rdata->refcount, cifs_readdata_release);
3944         }
3945
3946         /* Any pages that have been shown to fscache but didn't get added to
3947          * the pagecache must be uncached before they get returned to the
3948          * allocator.
3949          */
3950         cifs_fscache_readpages_cancel(mapping->host, page_list);
3951         return rc;
3952 }
3953
3954 /*
3955  * cifs_readpage_worker must be called with the page pinned
3956  */
3957 static int cifs_readpage_worker(struct file *file, struct page *page,
3958         loff_t *poffset)
3959 {
3960         char *read_data;
3961         int rc;
3962
3963         /* Is the page cached? */
3964         rc = cifs_readpage_from_fscache(file_inode(file), page);
3965         if (rc == 0)
3966                 goto read_complete;
3967
3968         read_data = kmap(page);
3969         /* for reads over a certain size could initiate async read ahead */
3970
3971         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3972
3973         if (rc < 0)
3974                 goto io_error;
3975         else
3976                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3977
3978         file_inode(file)->i_atime =
3979                 current_time(file_inode(file));
3980
3981         if (PAGE_SIZE > rc)
3982                 memset(read_data + rc, 0, PAGE_SIZE - rc);
3983
3984         flush_dcache_page(page);
3985         SetPageUptodate(page);
3986
3987         /* send this page to the cache */
3988         cifs_readpage_to_fscache(file_inode(file), page);
3989
3990         rc = 0;
3991
3992 io_error:
3993         kunmap(page);
3994         unlock_page(page);
3995
3996 read_complete:
3997         return rc;
3998 }
3999
4000 static int cifs_readpage(struct file *file, struct page *page)
4001 {
4002         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4003         int rc = -EACCES;
4004         unsigned int xid;
4005
4006         xid = get_xid();
4007
4008         if (file->private_data == NULL) {
4009                 rc = -EBADF;
4010                 free_xid(xid);
4011                 return rc;
4012         }
4013
4014         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4015                  page, (int)offset, (int)offset);
4016
4017         rc = cifs_readpage_worker(file, page, &offset);
4018
4019         free_xid(xid);
4020         return rc;
4021 }
4022
4023 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4024 {
4025         struct cifsFileInfo *open_file;
4026
4027         spin_lock(&cifs_inode->open_file_lock);
4028         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4029                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4030                         spin_unlock(&cifs_inode->open_file_lock);
4031                         return 1;
4032                 }
4033         }
4034         spin_unlock(&cifs_inode->open_file_lock);
4035         return 0;
4036 }
4037
4038 /* We do not want to update the file size from server for inodes
4039    open for write - to avoid races with writepage extending
4040    the file - in the future we could consider allowing
4041    refreshing the inode only on increases in the file size
4042    but this is tricky to do without racing with writebehind
4043    page caching in the current Linux kernel design */
4044 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4045 {
4046         if (!cifsInode)
4047                 return true;
4048
4049         if (is_inode_writable(cifsInode)) {
4050                 /* This inode is open for write at least once */
4051                 struct cifs_sb_info *cifs_sb;
4052
4053                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4054                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4055                         /* since no page cache to corrupt on directio
4056                         we can change size safely */
4057                         return true;
4058                 }
4059
4060                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4061                         return true;
4062
4063                 return false;
4064         } else
4065                 return true;
4066 }
4067
4068 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4069                         loff_t pos, unsigned len, unsigned flags,
4070                         struct page **pagep, void **fsdata)
4071 {
4072         int oncethru = 0;
4073         pgoff_t index = pos >> PAGE_SHIFT;
4074         loff_t offset = pos & (PAGE_SIZE - 1);
4075         loff_t page_start = pos & PAGE_MASK;
4076         loff_t i_size;
4077         struct page *page;
4078         int rc = 0;
4079
4080         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4081
4082 start:
4083         page = grab_cache_page_write_begin(mapping, index, flags);
4084         if (!page) {
4085                 rc = -ENOMEM;
4086                 goto out;
4087         }
4088
4089         if (PageUptodate(page))
4090                 goto out;
4091
4092         /*
4093          * If we write a full page it will be up to date, no need to read from
4094          * the server. If the write is short, we'll end up doing a sync write
4095          * instead.
4096          */
4097         if (len == PAGE_SIZE)
4098                 goto out;
4099
4100         /*
4101          * optimize away the read when we have an oplock, and we're not
4102          * expecting to use any of the data we'd be reading in. That
4103          * is, when the page lies beyond the EOF, or straddles the EOF
4104          * and the write will cover all of the existing data.
4105          */
4106         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4107                 i_size = i_size_read(mapping->host);
4108                 if (page_start >= i_size ||
4109                     (offset == 0 && (pos + len) >= i_size)) {
4110                         zero_user_segments(page, 0, offset,
4111                                            offset + len,
4112                                            PAGE_SIZE);
4113                         /*
4114                          * PageChecked means that the parts of the page
4115                          * to which we're not writing are considered up
4116                          * to date. Once the data is copied to the
4117                          * page, it can be set uptodate.
4118                          */
4119                         SetPageChecked(page);
4120                         goto out;
4121                 }
4122         }
4123
4124         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4125                 /*
4126                  * might as well read a page, it is fast enough. If we get
4127                  * an error, we don't need to return it. cifs_write_end will
4128                  * do a sync write instead since PG_uptodate isn't set.
4129                  */
4130                 cifs_readpage_worker(file, page, &page_start);
4131                 put_page(page);
4132                 oncethru = 1;
4133                 goto start;
4134         } else {
4135                 /* we could try using another file handle if there is one -
4136                    but how would we lock it to prevent close of that handle
4137                    racing with this read? In any case
4138                    this will be written out by write_end so is fine */
4139         }
4140 out:
4141         *pagep = page;
4142         return rc;
4143 }
4144
4145 static int cifs_release_page(struct page *page, gfp_t gfp)
4146 {
4147         if (PagePrivate(page))
4148                 return 0;
4149
4150         return cifs_fscache_release_page(page, gfp);
4151 }
4152
4153 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4154                                  unsigned int length)
4155 {
4156         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4157
4158         if (offset == 0 && length == PAGE_SIZE)
4159                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4160 }
4161
4162 static int cifs_launder_page(struct page *page)
4163 {
4164         int rc = 0;
4165         loff_t range_start = page_offset(page);
4166         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4167         struct writeback_control wbc = {
4168                 .sync_mode = WB_SYNC_ALL,
4169                 .nr_to_write = 0,
4170                 .range_start = range_start,
4171                 .range_end = range_end,
4172         };
4173
4174         cifs_dbg(FYI, "Launder page: %p\n", page);
4175
4176         if (clear_page_dirty_for_io(page))
4177                 rc = cifs_writepage_locked(page, &wbc);
4178
4179         cifs_fscache_invalidate_page(page, page->mapping->host);
4180         return rc;
4181 }
4182
4183 void cifs_oplock_break(struct work_struct *work)
4184 {
4185         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4186                                                   oplock_break);
4187         struct inode *inode = d_inode(cfile->dentry);
4188         struct cifsInodeInfo *cinode = CIFS_I(inode);
4189         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4190         struct TCP_Server_Info *server = tcon->ses->server;
4191         int rc = 0;
4192         bool purge_cache = false;
4193
4194         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4195                         TASK_UNINTERRUPTIBLE);
4196
4197         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4198                                       cfile->oplock_epoch, &purge_cache);
4199
4200         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4201                                                 cifs_has_mand_locks(cinode)) {
4202                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4203                          inode);
4204                 cinode->oplock = 0;
4205         }
4206
4207         if (inode && S_ISREG(inode->i_mode)) {
4208                 if (CIFS_CACHE_READ(cinode))
4209                         break_lease(inode, O_RDONLY);
4210                 else
4211                         break_lease(inode, O_WRONLY);
4212                 rc = filemap_fdatawrite(inode->i_mapping);
4213                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4214                         rc = filemap_fdatawait(inode->i_mapping);
4215                         mapping_set_error(inode->i_mapping, rc);
4216                         cifs_zap_mapping(inode);
4217                 }
4218                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4219                 if (CIFS_CACHE_WRITE(cinode))
4220                         goto oplock_break_ack;
4221         }
4222
4223         rc = cifs_push_locks(cfile);
4224         if (rc)
4225                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4226
4227 oplock_break_ack:
4228         /*
4229          * releasing stale oplock after recent reconnect of smb session using
4230          * a now incorrect file handle is not a data integrity issue but do
4231          * not bother sending an oplock release if session to server still is
4232          * disconnected since oplock already released by the server
4233          */
4234         if (!cfile->oplock_break_cancelled) {
4235                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4236                                                              cinode);
4237                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4238         }
4239         _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
4240         cifs_done_oplock_break(cinode);
4241 }
4242
4243 /*
4244  * The presence of cifs_direct_io() in the address space ops vector
4245  * allowes open() O_DIRECT flags which would have failed otherwise.
4246  *
4247  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4248  * so this method should never be called.
4249  *
4250  * Direct IO is not yet supported in the cached mode. 
4251  */
4252 static ssize_t
4253 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4254 {
4255         /*
4256          * FIXME
4257          * Eventually need to support direct IO for non forcedirectio mounts
4258          */
4259         return -EINVAL;
4260 }
4261
4262
4263 const struct address_space_operations cifs_addr_ops = {
4264         .readpage = cifs_readpage,
4265         .readpages = cifs_readpages,
4266         .writepage = cifs_writepage,
4267         .writepages = cifs_writepages,
4268         .write_begin = cifs_write_begin,
4269         .write_end = cifs_write_end,
4270         .set_page_dirty = __set_page_dirty_nobuffers,
4271         .releasepage = cifs_release_page,
4272         .direct_IO = cifs_direct_io,
4273         .invalidatepage = cifs_invalidate_page,
4274         .launder_page = cifs_launder_page,
4275 };
4276
4277 /*
4278  * cifs_readpages requires the server to support a buffer large enough to
4279  * contain the header plus one complete page of data.  Otherwise, we need
4280  * to leave cifs_readpages out of the address space operations.
4281  */
4282 const struct address_space_operations cifs_addr_ops_smallbuf = {
4283         .readpage = cifs_readpage,
4284         .writepage = cifs_writepage,
4285         .writepages = cifs_writepages,
4286         .write_begin = cifs_write_begin,
4287         .write_end = cifs_write_end,
4288         .set_page_dirty = __set_page_dirty_nobuffers,
4289         .releasepage = cifs_release_page,
4290         .invalidatepage = cifs_invalidate_page,
4291         .launder_page = cifs_launder_page,
4292 };