GNU Linux-libre 6.0.2-gnu
[releases.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "smb2proto.h"
30 #include "cifs_unicode.h"
31 #include "cifs_debug.h"
32 #include "cifs_fs_sb.h"
33 #include "fscache.h"
34 #include "smbdirect.h"
35 #include "fs_context.h"
36 #include "cifs_ioctl.h"
37 #include "cached_dir.h"
38
39 /*
40  * Mark as invalid, all open files on tree connections since they
41  * were closed when session to server was lost.
42  */
43 void
44 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
45 {
46         struct cifsFileInfo *open_file = NULL;
47         struct list_head *tmp;
48         struct list_head *tmp1;
49
50         /* only send once per connect */
51         spin_lock(&tcon->ses->ses_lock);
52         if ((tcon->ses->ses_status != SES_GOOD) || (tcon->status != TID_NEED_RECON)) {
53                 spin_unlock(&tcon->ses->ses_lock);
54                 return;
55         }
56         tcon->status = TID_IN_FILES_INVALIDATE;
57         spin_unlock(&tcon->ses->ses_lock);
58
59         /* list all files open on tree connection and mark them invalid */
60         spin_lock(&tcon->open_file_lock);
61         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
62                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
63                 open_file->invalidHandle = true;
64                 open_file->oplock_break_cancelled = true;
65         }
66         spin_unlock(&tcon->open_file_lock);
67
68         invalidate_all_cached_dirs(tcon);
69         spin_lock(&tcon->tc_lock);
70         if (tcon->status == TID_IN_FILES_INVALIDATE)
71                 tcon->status = TID_NEED_TCON;
72         spin_unlock(&tcon->tc_lock);
73
74         /*
75          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
76          * to this tcon.
77          */
78 }
79
80 static inline int cifs_convert_flags(unsigned int flags)
81 {
82         if ((flags & O_ACCMODE) == O_RDONLY)
83                 return GENERIC_READ;
84         else if ((flags & O_ACCMODE) == O_WRONLY)
85                 return GENERIC_WRITE;
86         else if ((flags & O_ACCMODE) == O_RDWR) {
87                 /* GENERIC_ALL is too much permission to request
88                    can cause unnecessary access denied on create */
89                 /* return GENERIC_ALL; */
90                 return (GENERIC_READ | GENERIC_WRITE);
91         }
92
93         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
94                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
95                 FILE_READ_DATA);
96 }
97
98 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
99 static u32 cifs_posix_convert_flags(unsigned int flags)
100 {
101         u32 posix_flags = 0;
102
103         if ((flags & O_ACCMODE) == O_RDONLY)
104                 posix_flags = SMB_O_RDONLY;
105         else if ((flags & O_ACCMODE) == O_WRONLY)
106                 posix_flags = SMB_O_WRONLY;
107         else if ((flags & O_ACCMODE) == O_RDWR)
108                 posix_flags = SMB_O_RDWR;
109
110         if (flags & O_CREAT) {
111                 posix_flags |= SMB_O_CREAT;
112                 if (flags & O_EXCL)
113                         posix_flags |= SMB_O_EXCL;
114         } else if (flags & O_EXCL)
115                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
116                          current->comm, current->tgid);
117
118         if (flags & O_TRUNC)
119                 posix_flags |= SMB_O_TRUNC;
120         /* be safe and imply O_SYNC for O_DSYNC */
121         if (flags & O_DSYNC)
122                 posix_flags |= SMB_O_SYNC;
123         if (flags & O_DIRECTORY)
124                 posix_flags |= SMB_O_DIRECTORY;
125         if (flags & O_NOFOLLOW)
126                 posix_flags |= SMB_O_NOFOLLOW;
127         if (flags & O_DIRECT)
128                 posix_flags |= SMB_O_DIRECT;
129
130         return posix_flags;
131 }
132 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
133
134 static inline int cifs_get_disposition(unsigned int flags)
135 {
136         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
137                 return FILE_CREATE;
138         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
139                 return FILE_OVERWRITE_IF;
140         else if ((flags & O_CREAT) == O_CREAT)
141                 return FILE_OPEN_IF;
142         else if ((flags & O_TRUNC) == O_TRUNC)
143                 return FILE_OVERWRITE;
144         else
145                 return FILE_OPEN;
146 }
147
148 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
149 int cifs_posix_open(const char *full_path, struct inode **pinode,
150                         struct super_block *sb, int mode, unsigned int f_flags,
151                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
152 {
153         int rc;
154         FILE_UNIX_BASIC_INFO *presp_data;
155         __u32 posix_flags = 0;
156         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
157         struct cifs_fattr fattr;
158         struct tcon_link *tlink;
159         struct cifs_tcon *tcon;
160
161         cifs_dbg(FYI, "posix open %s\n", full_path);
162
163         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
164         if (presp_data == NULL)
165                 return -ENOMEM;
166
167         tlink = cifs_sb_tlink(cifs_sb);
168         if (IS_ERR(tlink)) {
169                 rc = PTR_ERR(tlink);
170                 goto posix_open_ret;
171         }
172
173         tcon = tlink_tcon(tlink);
174         mode &= ~current_umask();
175
176         posix_flags = cifs_posix_convert_flags(f_flags);
177         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
178                              poplock, full_path, cifs_sb->local_nls,
179                              cifs_remap(cifs_sb));
180         cifs_put_tlink(tlink);
181
182         if (rc)
183                 goto posix_open_ret;
184
185         if (presp_data->Type == cpu_to_le32(-1))
186                 goto posix_open_ret; /* open ok, caller does qpathinfo */
187
188         if (!pinode)
189                 goto posix_open_ret; /* caller does not need info */
190
191         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
192
193         /* get new inode and set it up */
194         if (*pinode == NULL) {
195                 cifs_fill_uniqueid(sb, &fattr);
196                 *pinode = cifs_iget(sb, &fattr);
197                 if (!*pinode) {
198                         rc = -ENOMEM;
199                         goto posix_open_ret;
200                 }
201         } else {
202                 cifs_revalidate_mapping(*pinode);
203                 rc = cifs_fattr_to_inode(*pinode, &fattr);
204         }
205
206 posix_open_ret:
207         kfree(presp_data);
208         return rc;
209 }
210 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
211
212 static int
213 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
214              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
215              struct cifs_fid *fid, unsigned int xid)
216 {
217         int rc;
218         int desired_access;
219         int disposition;
220         int create_options = CREATE_NOT_DIR;
221         FILE_ALL_INFO *buf;
222         struct TCP_Server_Info *server = tcon->ses->server;
223         struct cifs_open_parms oparms;
224
225         if (!server->ops->open)
226                 return -ENOSYS;
227
228         desired_access = cifs_convert_flags(f_flags);
229
230 /*********************************************************************
231  *  open flag mapping table:
232  *
233  *      POSIX Flag            CIFS Disposition
234  *      ----------            ----------------
235  *      O_CREAT               FILE_OPEN_IF
236  *      O_CREAT | O_EXCL      FILE_CREATE
237  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
238  *      O_TRUNC               FILE_OVERWRITE
239  *      none of the above     FILE_OPEN
240  *
241  *      Note that there is not a direct match between disposition
242  *      FILE_SUPERSEDE (ie create whether or not file exists although
243  *      O_CREAT | O_TRUNC is similar but truncates the existing
244  *      file rather than creating a new file as FILE_SUPERSEDE does
245  *      (which uses the attributes / metadata passed in on open call)
246  *?
247  *?  O_SYNC is a reasonable match to CIFS writethrough flag
248  *?  and the read write flags match reasonably.  O_LARGEFILE
249  *?  is irrelevant because largefile support is always used
250  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
251  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
252  *********************************************************************/
253
254         disposition = cifs_get_disposition(f_flags);
255
256         /* BB pass O_SYNC flag through on file attributes .. BB */
257
258         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
259         if (!buf)
260                 return -ENOMEM;
261
262         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
263         if (f_flags & O_SYNC)
264                 create_options |= CREATE_WRITE_THROUGH;
265
266         if (f_flags & O_DIRECT)
267                 create_options |= CREATE_NO_BUFFER;
268
269         oparms.tcon = tcon;
270         oparms.cifs_sb = cifs_sb;
271         oparms.desired_access = desired_access;
272         oparms.create_options = cifs_create_options(cifs_sb, create_options);
273         oparms.disposition = disposition;
274         oparms.path = full_path;
275         oparms.fid = fid;
276         oparms.reconnect = false;
277
278         rc = server->ops->open(xid, &oparms, oplock, buf);
279
280         if (rc)
281                 goto out;
282
283         /* TODO: Add support for calling posix query info but with passing in fid */
284         if (tcon->unix_ext)
285                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
286                                               xid);
287         else
288                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
289                                          xid, fid);
290
291         if (rc) {
292                 server->ops->close(xid, tcon, fid);
293                 if (rc == -ESTALE)
294                         rc = -EOPENSTALE;
295         }
296
297 out:
298         kfree(buf);
299         return rc;
300 }
301
302 static bool
303 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
304 {
305         struct cifs_fid_locks *cur;
306         bool has_locks = false;
307
308         down_read(&cinode->lock_sem);
309         list_for_each_entry(cur, &cinode->llist, llist) {
310                 if (!list_empty(&cur->locks)) {
311                         has_locks = true;
312                         break;
313                 }
314         }
315         up_read(&cinode->lock_sem);
316         return has_locks;
317 }
318
319 void
320 cifs_down_write(struct rw_semaphore *sem)
321 {
322         while (!down_write_trylock(sem))
323                 msleep(10);
324 }
325
326 static void cifsFileInfo_put_work(struct work_struct *work);
327
328 struct cifsFileInfo *
329 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
330                   struct tcon_link *tlink, __u32 oplock)
331 {
332         struct dentry *dentry = file_dentry(file);
333         struct inode *inode = d_inode(dentry);
334         struct cifsInodeInfo *cinode = CIFS_I(inode);
335         struct cifsFileInfo *cfile;
336         struct cifs_fid_locks *fdlocks;
337         struct cifs_tcon *tcon = tlink_tcon(tlink);
338         struct TCP_Server_Info *server = tcon->ses->server;
339
340         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
341         if (cfile == NULL)
342                 return cfile;
343
344         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
345         if (!fdlocks) {
346                 kfree(cfile);
347                 return NULL;
348         }
349
350         INIT_LIST_HEAD(&fdlocks->locks);
351         fdlocks->cfile = cfile;
352         cfile->llist = fdlocks;
353
354         cfile->count = 1;
355         cfile->pid = current->tgid;
356         cfile->uid = current_fsuid();
357         cfile->dentry = dget(dentry);
358         cfile->f_flags = file->f_flags;
359         cfile->invalidHandle = false;
360         cfile->deferred_close_scheduled = false;
361         cfile->tlink = cifs_get_tlink(tlink);
362         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
363         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
364         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
365         mutex_init(&cfile->fh_mutex);
366         spin_lock_init(&cfile->file_info_lock);
367
368         cifs_sb_active(inode->i_sb);
369
370         /*
371          * If the server returned a read oplock and we have mandatory brlocks,
372          * set oplock level to None.
373          */
374         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
375                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
376                 oplock = 0;
377         }
378
379         cifs_down_write(&cinode->lock_sem);
380         list_add(&fdlocks->llist, &cinode->llist);
381         up_write(&cinode->lock_sem);
382
383         spin_lock(&tcon->open_file_lock);
384         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
385                 oplock = fid->pending_open->oplock;
386         list_del(&fid->pending_open->olist);
387
388         fid->purge_cache = false;
389         server->ops->set_fid(cfile, fid, oplock);
390
391         list_add(&cfile->tlist, &tcon->openFileList);
392         atomic_inc(&tcon->num_local_opens);
393
394         /* if readable file instance put first in list*/
395         spin_lock(&cinode->open_file_lock);
396         if (file->f_mode & FMODE_READ)
397                 list_add(&cfile->flist, &cinode->openFileList);
398         else
399                 list_add_tail(&cfile->flist, &cinode->openFileList);
400         spin_unlock(&cinode->open_file_lock);
401         spin_unlock(&tcon->open_file_lock);
402
403         if (fid->purge_cache)
404                 cifs_zap_mapping(inode);
405
406         file->private_data = cfile;
407         return cfile;
408 }
409
410 struct cifsFileInfo *
411 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
412 {
413         spin_lock(&cifs_file->file_info_lock);
414         cifsFileInfo_get_locked(cifs_file);
415         spin_unlock(&cifs_file->file_info_lock);
416         return cifs_file;
417 }
418
419 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
420 {
421         struct inode *inode = d_inode(cifs_file->dentry);
422         struct cifsInodeInfo *cifsi = CIFS_I(inode);
423         struct cifsLockInfo *li, *tmp;
424         struct super_block *sb = inode->i_sb;
425
426         /*
427          * Delete any outstanding lock records. We'll lose them when the file
428          * is closed anyway.
429          */
430         cifs_down_write(&cifsi->lock_sem);
431         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
432                 list_del(&li->llist);
433                 cifs_del_lock_waiters(li);
434                 kfree(li);
435         }
436         list_del(&cifs_file->llist->llist);
437         kfree(cifs_file->llist);
438         up_write(&cifsi->lock_sem);
439
440         cifs_put_tlink(cifs_file->tlink);
441         dput(cifs_file->dentry);
442         cifs_sb_deactive(sb);
443         kfree(cifs_file);
444 }
445
446 static void cifsFileInfo_put_work(struct work_struct *work)
447 {
448         struct cifsFileInfo *cifs_file = container_of(work,
449                         struct cifsFileInfo, put);
450
451         cifsFileInfo_put_final(cifs_file);
452 }
453
454 /**
455  * cifsFileInfo_put - release a reference of file priv data
456  *
457  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
458  *
459  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
460  */
461 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
462 {
463         _cifsFileInfo_put(cifs_file, true, true);
464 }
465
466 /**
467  * _cifsFileInfo_put - release a reference of file priv data
468  *
469  * This may involve closing the filehandle @cifs_file out on the
470  * server. Must be called without holding tcon->open_file_lock,
471  * cinode->open_file_lock and cifs_file->file_info_lock.
472  *
473  * If @wait_for_oplock_handler is true and we are releasing the last
474  * reference, wait for any running oplock break handler of the file
475  * and cancel any pending one.
476  *
477  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
478  * @wait_oplock_handler: must be false if called from oplock_break_handler
479  * @offload:    not offloaded on close and oplock breaks
480  *
481  */
482 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
483                        bool wait_oplock_handler, bool offload)
484 {
485         struct inode *inode = d_inode(cifs_file->dentry);
486         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
487         struct TCP_Server_Info *server = tcon->ses->server;
488         struct cifsInodeInfo *cifsi = CIFS_I(inode);
489         struct super_block *sb = inode->i_sb;
490         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
491         struct cifs_fid fid;
492         struct cifs_pending_open open;
493         bool oplock_break_cancelled;
494
495         spin_lock(&tcon->open_file_lock);
496         spin_lock(&cifsi->open_file_lock);
497         spin_lock(&cifs_file->file_info_lock);
498         if (--cifs_file->count > 0) {
499                 spin_unlock(&cifs_file->file_info_lock);
500                 spin_unlock(&cifsi->open_file_lock);
501                 spin_unlock(&tcon->open_file_lock);
502                 return;
503         }
504         spin_unlock(&cifs_file->file_info_lock);
505
506         if (server->ops->get_lease_key)
507                 server->ops->get_lease_key(inode, &fid);
508
509         /* store open in pending opens to make sure we don't miss lease break */
510         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
511
512         /* remove it from the lists */
513         list_del(&cifs_file->flist);
514         list_del(&cifs_file->tlist);
515         atomic_dec(&tcon->num_local_opens);
516
517         if (list_empty(&cifsi->openFileList)) {
518                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
519                          d_inode(cifs_file->dentry));
520                 /*
521                  * In strict cache mode we need invalidate mapping on the last
522                  * close  because it may cause a error when we open this file
523                  * again and get at least level II oplock.
524                  */
525                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
526                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
527                 cifs_set_oplock_level(cifsi, 0);
528         }
529
530         spin_unlock(&cifsi->open_file_lock);
531         spin_unlock(&tcon->open_file_lock);
532
533         oplock_break_cancelled = wait_oplock_handler ?
534                 cancel_work_sync(&cifs_file->oplock_break) : false;
535
536         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
537                 struct TCP_Server_Info *server = tcon->ses->server;
538                 unsigned int xid;
539
540                 xid = get_xid();
541                 if (server->ops->close_getattr)
542                         server->ops->close_getattr(xid, tcon, cifs_file);
543                 else if (server->ops->close)
544                         server->ops->close(xid, tcon, &cifs_file->fid);
545                 _free_xid(xid);
546         }
547
548         if (oplock_break_cancelled)
549                 cifs_done_oplock_break(cifsi);
550
551         cifs_del_pending_open(&open);
552
553         if (offload)
554                 queue_work(fileinfo_put_wq, &cifs_file->put);
555         else
556                 cifsFileInfo_put_final(cifs_file);
557 }
558
559 int cifs_open(struct inode *inode, struct file *file)
560
561 {
562         int rc = -EACCES;
563         unsigned int xid;
564         __u32 oplock;
565         struct cifs_sb_info *cifs_sb;
566         struct TCP_Server_Info *server;
567         struct cifs_tcon *tcon;
568         struct tcon_link *tlink;
569         struct cifsFileInfo *cfile = NULL;
570         void *page;
571         const char *full_path;
572         bool posix_open_ok = false;
573         struct cifs_fid fid;
574         struct cifs_pending_open open;
575
576         xid = get_xid();
577
578         cifs_sb = CIFS_SB(inode->i_sb);
579         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
580                 free_xid(xid);
581                 return -EIO;
582         }
583
584         tlink = cifs_sb_tlink(cifs_sb);
585         if (IS_ERR(tlink)) {
586                 free_xid(xid);
587                 return PTR_ERR(tlink);
588         }
589         tcon = tlink_tcon(tlink);
590         server = tcon->ses->server;
591
592         page = alloc_dentry_path();
593         full_path = build_path_from_dentry(file_dentry(file), page);
594         if (IS_ERR(full_path)) {
595                 rc = PTR_ERR(full_path);
596                 goto out;
597         }
598
599         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
600                  inode, file->f_flags, full_path);
601
602         if (file->f_flags & O_DIRECT &&
603             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
604                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
605                         file->f_op = &cifs_file_direct_nobrl_ops;
606                 else
607                         file->f_op = &cifs_file_direct_ops;
608         }
609
610         /* Get the cached handle as SMB2 close is deferred */
611         rc = cifs_get_readable_path(tcon, full_path, &cfile);
612         if (rc == 0) {
613                 if (file->f_flags == cfile->f_flags) {
614                         file->private_data = cfile;
615                         spin_lock(&CIFS_I(inode)->deferred_lock);
616                         cifs_del_deferred_close(cfile);
617                         spin_unlock(&CIFS_I(inode)->deferred_lock);
618                         goto use_cache;
619                 } else {
620                         _cifsFileInfo_put(cfile, true, false);
621                 }
622         }
623
624         if (server->oplocks)
625                 oplock = REQ_OPLOCK;
626         else
627                 oplock = 0;
628
629 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
630         if (!tcon->broken_posix_open && tcon->unix_ext &&
631             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
632                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
633                 /* can not refresh inode info since size could be stale */
634                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
635                                 cifs_sb->ctx->file_mode /* ignored */,
636                                 file->f_flags, &oplock, &fid.netfid, xid);
637                 if (rc == 0) {
638                         cifs_dbg(FYI, "posix open succeeded\n");
639                         posix_open_ok = true;
640                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
641                         if (tcon->ses->serverNOS)
642                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
643                                          tcon->ses->ip_addr,
644                                          tcon->ses->serverNOS);
645                         tcon->broken_posix_open = true;
646                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
647                          (rc != -EOPNOTSUPP)) /* path not found or net err */
648                         goto out;
649                 /*
650                  * Else fallthrough to retry open the old way on network i/o
651                  * or DFS errors.
652                  */
653         }
654 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
655
656         if (server->ops->get_lease_key)
657                 server->ops->get_lease_key(inode, &fid);
658
659         cifs_add_pending_open(&fid, tlink, &open);
660
661         if (!posix_open_ok) {
662                 if (server->ops->get_lease_key)
663                         server->ops->get_lease_key(inode, &fid);
664
665                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
666                                   file->f_flags, &oplock, &fid, xid);
667                 if (rc) {
668                         cifs_del_pending_open(&open);
669                         goto out;
670                 }
671         }
672
673         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
674         if (cfile == NULL) {
675                 if (server->ops->close)
676                         server->ops->close(xid, tcon, &fid);
677                 cifs_del_pending_open(&open);
678                 rc = -ENOMEM;
679                 goto out;
680         }
681
682 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
683         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
684                 /*
685                  * Time to set mode which we can not set earlier due to
686                  * problems creating new read-only files.
687                  */
688                 struct cifs_unix_set_info_args args = {
689                         .mode   = inode->i_mode,
690                         .uid    = INVALID_UID, /* no change */
691                         .gid    = INVALID_GID, /* no change */
692                         .ctime  = NO_CHANGE_64,
693                         .atime  = NO_CHANGE_64,
694                         .mtime  = NO_CHANGE_64,
695                         .device = 0,
696                 };
697                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
698                                        cfile->pid);
699         }
700 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
701
702 use_cache:
703         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
704                            file->f_mode & FMODE_WRITE);
705         if (file->f_flags & O_DIRECT &&
706             (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
707              file->f_flags & O_APPEND))
708                 cifs_invalidate_cache(file_inode(file),
709                                       FSCACHE_INVAL_DIO_WRITE);
710
711 out:
712         free_dentry_path(page);
713         free_xid(xid);
714         cifs_put_tlink(tlink);
715         return rc;
716 }
717
718 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
719 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
720 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
721
722 /*
723  * Try to reacquire byte range locks that were released when session
724  * to server was lost.
725  */
726 static int
727 cifs_relock_file(struct cifsFileInfo *cfile)
728 {
729         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
730         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
731         int rc = 0;
732 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
733         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
734 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
735
736         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
737         if (cinode->can_cache_brlcks) {
738                 /* can cache locks - no need to relock */
739                 up_read(&cinode->lock_sem);
740                 return rc;
741         }
742
743 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
744         if (cap_unix(tcon->ses) &&
745             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
746             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
747                 rc = cifs_push_posix_locks(cfile);
748         else
749 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
750                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
751
752         up_read(&cinode->lock_sem);
753         return rc;
754 }
755
756 static int
757 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
758 {
759         int rc = -EACCES;
760         unsigned int xid;
761         __u32 oplock;
762         struct cifs_sb_info *cifs_sb;
763         struct cifs_tcon *tcon;
764         struct TCP_Server_Info *server;
765         struct cifsInodeInfo *cinode;
766         struct inode *inode;
767         void *page;
768         const char *full_path;
769         int desired_access;
770         int disposition = FILE_OPEN;
771         int create_options = CREATE_NOT_DIR;
772         struct cifs_open_parms oparms;
773
774         xid = get_xid();
775         mutex_lock(&cfile->fh_mutex);
776         if (!cfile->invalidHandle) {
777                 mutex_unlock(&cfile->fh_mutex);
778                 free_xid(xid);
779                 return 0;
780         }
781
782         inode = d_inode(cfile->dentry);
783         cifs_sb = CIFS_SB(inode->i_sb);
784         tcon = tlink_tcon(cfile->tlink);
785         server = tcon->ses->server;
786
787         /*
788          * Can not grab rename sem here because various ops, including those
789          * that already have the rename sem can end up causing writepage to get
790          * called and if the server was down that means we end up here, and we
791          * can never tell if the caller already has the rename_sem.
792          */
793         page = alloc_dentry_path();
794         full_path = build_path_from_dentry(cfile->dentry, page);
795         if (IS_ERR(full_path)) {
796                 mutex_unlock(&cfile->fh_mutex);
797                 free_dentry_path(page);
798                 free_xid(xid);
799                 return PTR_ERR(full_path);
800         }
801
802         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
803                  inode, cfile->f_flags, full_path);
804
805         if (tcon->ses->server->oplocks)
806                 oplock = REQ_OPLOCK;
807         else
808                 oplock = 0;
809
810 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
811         if (tcon->unix_ext && cap_unix(tcon->ses) &&
812             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
813                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
814                 /*
815                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
816                  * original open. Must mask them off for a reopen.
817                  */
818                 unsigned int oflags = cfile->f_flags &
819                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
820
821                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
822                                      cifs_sb->ctx->file_mode /* ignored */,
823                                      oflags, &oplock, &cfile->fid.netfid, xid);
824                 if (rc == 0) {
825                         cifs_dbg(FYI, "posix reopen succeeded\n");
826                         oparms.reconnect = true;
827                         goto reopen_success;
828                 }
829                 /*
830                  * fallthrough to retry open the old way on errors, especially
831                  * in the reconnect path it is important to retry hard
832                  */
833         }
834 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
835
836         desired_access = cifs_convert_flags(cfile->f_flags);
837
838         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
839         if (cfile->f_flags & O_SYNC)
840                 create_options |= CREATE_WRITE_THROUGH;
841
842         if (cfile->f_flags & O_DIRECT)
843                 create_options |= CREATE_NO_BUFFER;
844
845         if (server->ops->get_lease_key)
846                 server->ops->get_lease_key(inode, &cfile->fid);
847
848         oparms.tcon = tcon;
849         oparms.cifs_sb = cifs_sb;
850         oparms.desired_access = desired_access;
851         oparms.create_options = cifs_create_options(cifs_sb, create_options);
852         oparms.disposition = disposition;
853         oparms.path = full_path;
854         oparms.fid = &cfile->fid;
855         oparms.reconnect = true;
856
857         /*
858          * Can not refresh inode by passing in file_info buf to be returned by
859          * ops->open and then calling get_inode_info with returned buf since
860          * file might have write behind data that needs to be flushed and server
861          * version of file size can be stale. If we knew for sure that inode was
862          * not dirty locally we could do this.
863          */
864         rc = server->ops->open(xid, &oparms, &oplock, NULL);
865         if (rc == -ENOENT && oparms.reconnect == false) {
866                 /* durable handle timeout is expired - open the file again */
867                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
868                 /* indicate that we need to relock the file */
869                 oparms.reconnect = true;
870         }
871
872         if (rc) {
873                 mutex_unlock(&cfile->fh_mutex);
874                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
875                 cifs_dbg(FYI, "oplock: %d\n", oplock);
876                 goto reopen_error_exit;
877         }
878
879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
880 reopen_success:
881 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
882         cfile->invalidHandle = false;
883         mutex_unlock(&cfile->fh_mutex);
884         cinode = CIFS_I(inode);
885
886         if (can_flush) {
887                 rc = filemap_write_and_wait(inode->i_mapping);
888                 if (!is_interrupt_error(rc))
889                         mapping_set_error(inode->i_mapping, rc);
890
891                 if (tcon->posix_extensions)
892                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
893                 else if (tcon->unix_ext)
894                         rc = cifs_get_inode_info_unix(&inode, full_path,
895                                                       inode->i_sb, xid);
896                 else
897                         rc = cifs_get_inode_info(&inode, full_path, NULL,
898                                                  inode->i_sb, xid, NULL);
899         }
900         /*
901          * Else we are writing out data to server already and could deadlock if
902          * we tried to flush data, and since we do not know if we have data that
903          * would invalidate the current end of file on the server we can not go
904          * to the server to get the new inode info.
905          */
906
907         /*
908          * If the server returned a read oplock and we have mandatory brlocks,
909          * set oplock level to None.
910          */
911         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
912                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
913                 oplock = 0;
914         }
915
916         server->ops->set_fid(cfile, &cfile->fid, oplock);
917         if (oparms.reconnect)
918                 cifs_relock_file(cfile);
919
920 reopen_error_exit:
921         free_dentry_path(page);
922         free_xid(xid);
923         return rc;
924 }
925
926 void smb2_deferred_work_close(struct work_struct *work)
927 {
928         struct cifsFileInfo *cfile = container_of(work,
929                         struct cifsFileInfo, deferred.work);
930
931         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
932         cifs_del_deferred_close(cfile);
933         cfile->deferred_close_scheduled = false;
934         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
935         _cifsFileInfo_put(cfile, true, false);
936 }
937
938 int cifs_close(struct inode *inode, struct file *file)
939 {
940         struct cifsFileInfo *cfile;
941         struct cifsInodeInfo *cinode = CIFS_I(inode);
942         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
943         struct cifs_deferred_close *dclose;
944
945         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
946
947         if (file->private_data != NULL) {
948                 cfile = file->private_data;
949                 file->private_data = NULL;
950                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
951                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
952                     cinode->lease_granted &&
953                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
954                     dclose) {
955                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
956                                 inode->i_ctime = inode->i_mtime = current_time(inode);
957                         }
958                         spin_lock(&cinode->deferred_lock);
959                         cifs_add_deferred_close(cfile, dclose);
960                         if (cfile->deferred_close_scheduled &&
961                             delayed_work_pending(&cfile->deferred)) {
962                                 /*
963                                  * If there is no pending work, mod_delayed_work queues new work.
964                                  * So, Increase the ref count to avoid use-after-free.
965                                  */
966                                 if (!mod_delayed_work(deferredclose_wq,
967                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
968                                         cifsFileInfo_get(cfile);
969                         } else {
970                                 /* Deferred close for files */
971                                 queue_delayed_work(deferredclose_wq,
972                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
973                                 cfile->deferred_close_scheduled = true;
974                                 spin_unlock(&cinode->deferred_lock);
975                                 return 0;
976                         }
977                         spin_unlock(&cinode->deferred_lock);
978                         _cifsFileInfo_put(cfile, true, false);
979                 } else {
980                         _cifsFileInfo_put(cfile, true, false);
981                         kfree(dclose);
982                 }
983         }
984
985         /* return code from the ->release op is always ignored */
986         return 0;
987 }
988
989 void
990 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
991 {
992         struct cifsFileInfo *open_file, *tmp;
993         struct list_head tmp_list;
994
995         if (!tcon->use_persistent || !tcon->need_reopen_files)
996                 return;
997
998         tcon->need_reopen_files = false;
999
1000         cifs_dbg(FYI, "Reopen persistent handles\n");
1001         INIT_LIST_HEAD(&tmp_list);
1002
1003         /* list all files open on tree connection, reopen resilient handles  */
1004         spin_lock(&tcon->open_file_lock);
1005         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1006                 if (!open_file->invalidHandle)
1007                         continue;
1008                 cifsFileInfo_get(open_file);
1009                 list_add_tail(&open_file->rlist, &tmp_list);
1010         }
1011         spin_unlock(&tcon->open_file_lock);
1012
1013         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1014                 if (cifs_reopen_file(open_file, false /* do not flush */))
1015                         tcon->need_reopen_files = true;
1016                 list_del_init(&open_file->rlist);
1017                 cifsFileInfo_put(open_file);
1018         }
1019 }
1020
1021 int cifs_closedir(struct inode *inode, struct file *file)
1022 {
1023         int rc = 0;
1024         unsigned int xid;
1025         struct cifsFileInfo *cfile = file->private_data;
1026         struct cifs_tcon *tcon;
1027         struct TCP_Server_Info *server;
1028         char *buf;
1029
1030         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1031
1032         if (cfile == NULL)
1033                 return rc;
1034
1035         xid = get_xid();
1036         tcon = tlink_tcon(cfile->tlink);
1037         server = tcon->ses->server;
1038
1039         cifs_dbg(FYI, "Freeing private data in close dir\n");
1040         spin_lock(&cfile->file_info_lock);
1041         if (server->ops->dir_needs_close(cfile)) {
1042                 cfile->invalidHandle = true;
1043                 spin_unlock(&cfile->file_info_lock);
1044                 if (server->ops->close_dir)
1045                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1046                 else
1047                         rc = -ENOSYS;
1048                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1049                 /* not much we can do if it fails anyway, ignore rc */
1050                 rc = 0;
1051         } else
1052                 spin_unlock(&cfile->file_info_lock);
1053
1054         buf = cfile->srch_inf.ntwrk_buf_start;
1055         if (buf) {
1056                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1057                 cfile->srch_inf.ntwrk_buf_start = NULL;
1058                 if (cfile->srch_inf.smallBuf)
1059                         cifs_small_buf_release(buf);
1060                 else
1061                         cifs_buf_release(buf);
1062         }
1063
1064         cifs_put_tlink(cfile->tlink);
1065         kfree(file->private_data);
1066         file->private_data = NULL;
1067         /* BB can we lock the filestruct while this is going on? */
1068         free_xid(xid);
1069         return rc;
1070 }
1071
1072 static struct cifsLockInfo *
1073 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1074 {
1075         struct cifsLockInfo *lock =
1076                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1077         if (!lock)
1078                 return lock;
1079         lock->offset = offset;
1080         lock->length = length;
1081         lock->type = type;
1082         lock->pid = current->tgid;
1083         lock->flags = flags;
1084         INIT_LIST_HEAD(&lock->blist);
1085         init_waitqueue_head(&lock->block_q);
1086         return lock;
1087 }
1088
1089 void
1090 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1091 {
1092         struct cifsLockInfo *li, *tmp;
1093         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1094                 list_del_init(&li->blist);
1095                 wake_up(&li->block_q);
1096         }
1097 }
1098
1099 #define CIFS_LOCK_OP    0
1100 #define CIFS_READ_OP    1
1101 #define CIFS_WRITE_OP   2
1102
1103 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1104 static bool
1105 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1106                             __u64 length, __u8 type, __u16 flags,
1107                             struct cifsFileInfo *cfile,
1108                             struct cifsLockInfo **conf_lock, int rw_check)
1109 {
1110         struct cifsLockInfo *li;
1111         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1112         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1113
1114         list_for_each_entry(li, &fdlocks->locks, llist) {
1115                 if (offset + length <= li->offset ||
1116                     offset >= li->offset + li->length)
1117                         continue;
1118                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1119                     server->ops->compare_fids(cfile, cur_cfile)) {
1120                         /* shared lock prevents write op through the same fid */
1121                         if (!(li->type & server->vals->shared_lock_type) ||
1122                             rw_check != CIFS_WRITE_OP)
1123                                 continue;
1124                 }
1125                 if ((type & server->vals->shared_lock_type) &&
1126                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1127                      current->tgid == li->pid) || type == li->type))
1128                         continue;
1129                 if (rw_check == CIFS_LOCK_OP &&
1130                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1131                     server->ops->compare_fids(cfile, cur_cfile))
1132                         continue;
1133                 if (conf_lock)
1134                         *conf_lock = li;
1135                 return true;
1136         }
1137         return false;
1138 }
1139
1140 bool
1141 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1142                         __u8 type, __u16 flags,
1143                         struct cifsLockInfo **conf_lock, int rw_check)
1144 {
1145         bool rc = false;
1146         struct cifs_fid_locks *cur;
1147         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1148
1149         list_for_each_entry(cur, &cinode->llist, llist) {
1150                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1151                                                  flags, cfile, conf_lock,
1152                                                  rw_check);
1153                 if (rc)
1154                         break;
1155         }
1156
1157         return rc;
1158 }
1159
1160 /*
1161  * Check if there is another lock that prevents us to set the lock (mandatory
1162  * style). If such a lock exists, update the flock structure with its
1163  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1164  * or leave it the same if we can't. Returns 0 if we don't need to request to
1165  * the server or 1 otherwise.
1166  */
1167 static int
1168 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1169                __u8 type, struct file_lock *flock)
1170 {
1171         int rc = 0;
1172         struct cifsLockInfo *conf_lock;
1173         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1174         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1175         bool exist;
1176
1177         down_read(&cinode->lock_sem);
1178
1179         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1180                                         flock->fl_flags, &conf_lock,
1181                                         CIFS_LOCK_OP);
1182         if (exist) {
1183                 flock->fl_start = conf_lock->offset;
1184                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1185                 flock->fl_pid = conf_lock->pid;
1186                 if (conf_lock->type & server->vals->shared_lock_type)
1187                         flock->fl_type = F_RDLCK;
1188                 else
1189                         flock->fl_type = F_WRLCK;
1190         } else if (!cinode->can_cache_brlcks)
1191                 rc = 1;
1192         else
1193                 flock->fl_type = F_UNLCK;
1194
1195         up_read(&cinode->lock_sem);
1196         return rc;
1197 }
1198
1199 static void
1200 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1201 {
1202         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1203         cifs_down_write(&cinode->lock_sem);
1204         list_add_tail(&lock->llist, &cfile->llist->locks);
1205         up_write(&cinode->lock_sem);
1206 }
1207
1208 /*
1209  * Set the byte-range lock (mandatory style). Returns:
1210  * 1) 0, if we set the lock and don't need to request to the server;
1211  * 2) 1, if no locks prevent us but we need to request to the server;
1212  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1213  */
1214 static int
1215 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1216                  bool wait)
1217 {
1218         struct cifsLockInfo *conf_lock;
1219         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1220         bool exist;
1221         int rc = 0;
1222
1223 try_again:
1224         exist = false;
1225         cifs_down_write(&cinode->lock_sem);
1226
1227         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1228                                         lock->type, lock->flags, &conf_lock,
1229                                         CIFS_LOCK_OP);
1230         if (!exist && cinode->can_cache_brlcks) {
1231                 list_add_tail(&lock->llist, &cfile->llist->locks);
1232                 up_write(&cinode->lock_sem);
1233                 return rc;
1234         }
1235
1236         if (!exist)
1237                 rc = 1;
1238         else if (!wait)
1239                 rc = -EACCES;
1240         else {
1241                 list_add_tail(&lock->blist, &conf_lock->blist);
1242                 up_write(&cinode->lock_sem);
1243                 rc = wait_event_interruptible(lock->block_q,
1244                                         (lock->blist.prev == &lock->blist) &&
1245                                         (lock->blist.next == &lock->blist));
1246                 if (!rc)
1247                         goto try_again;
1248                 cifs_down_write(&cinode->lock_sem);
1249                 list_del_init(&lock->blist);
1250         }
1251
1252         up_write(&cinode->lock_sem);
1253         return rc;
1254 }
1255
1256 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1257 /*
1258  * Check if there is another lock that prevents us to set the lock (posix
1259  * style). If such a lock exists, update the flock structure with its
1260  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1261  * or leave it the same if we can't. Returns 0 if we don't need to request to
1262  * the server or 1 otherwise.
1263  */
1264 static int
1265 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1266 {
1267         int rc = 0;
1268         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1269         unsigned char saved_type = flock->fl_type;
1270
1271         if ((flock->fl_flags & FL_POSIX) == 0)
1272                 return 1;
1273
1274         down_read(&cinode->lock_sem);
1275         posix_test_lock(file, flock);
1276
1277         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1278                 flock->fl_type = saved_type;
1279                 rc = 1;
1280         }
1281
1282         up_read(&cinode->lock_sem);
1283         return rc;
1284 }
1285
1286 /*
1287  * Set the byte-range lock (posix style). Returns:
1288  * 1) <0, if the error occurs while setting the lock;
1289  * 2) 0, if we set the lock and don't need to request to the server;
1290  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1291  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1292  */
1293 static int
1294 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1295 {
1296         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1297         int rc = FILE_LOCK_DEFERRED + 1;
1298
1299         if ((flock->fl_flags & FL_POSIX) == 0)
1300                 return rc;
1301
1302         cifs_down_write(&cinode->lock_sem);
1303         if (!cinode->can_cache_brlcks) {
1304                 up_write(&cinode->lock_sem);
1305                 return rc;
1306         }
1307
1308         rc = posix_lock_file(file, flock, NULL);
1309         up_write(&cinode->lock_sem);
1310         return rc;
1311 }
1312
1313 int
1314 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1315 {
1316         unsigned int xid;
1317         int rc = 0, stored_rc;
1318         struct cifsLockInfo *li, *tmp;
1319         struct cifs_tcon *tcon;
1320         unsigned int num, max_num, max_buf;
1321         LOCKING_ANDX_RANGE *buf, *cur;
1322         static const int types[] = {
1323                 LOCKING_ANDX_LARGE_FILES,
1324                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1325         };
1326         int i;
1327
1328         xid = get_xid();
1329         tcon = tlink_tcon(cfile->tlink);
1330
1331         /*
1332          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1333          * and check it before using.
1334          */
1335         max_buf = tcon->ses->server->maxBuf;
1336         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1337                 free_xid(xid);
1338                 return -EINVAL;
1339         }
1340
1341         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1342                      PAGE_SIZE);
1343         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1344                         PAGE_SIZE);
1345         max_num = (max_buf - sizeof(struct smb_hdr)) /
1346                                                 sizeof(LOCKING_ANDX_RANGE);
1347         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1348         if (!buf) {
1349                 free_xid(xid);
1350                 return -ENOMEM;
1351         }
1352
1353         for (i = 0; i < 2; i++) {
1354                 cur = buf;
1355                 num = 0;
1356                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1357                         if (li->type != types[i])
1358                                 continue;
1359                         cur->Pid = cpu_to_le16(li->pid);
1360                         cur->LengthLow = cpu_to_le32((u32)li->length);
1361                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1362                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1363                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1364                         if (++num == max_num) {
1365                                 stored_rc = cifs_lockv(xid, tcon,
1366                                                        cfile->fid.netfid,
1367                                                        (__u8)li->type, 0, num,
1368                                                        buf);
1369                                 if (stored_rc)
1370                                         rc = stored_rc;
1371                                 cur = buf;
1372                                 num = 0;
1373                         } else
1374                                 cur++;
1375                 }
1376
1377                 if (num) {
1378                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1379                                                (__u8)types[i], 0, num, buf);
1380                         if (stored_rc)
1381                                 rc = stored_rc;
1382                 }
1383         }
1384
1385         kfree(buf);
1386         free_xid(xid);
1387         return rc;
1388 }
1389
1390 static __u32
1391 hash_lockowner(fl_owner_t owner)
1392 {
1393         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1394 }
1395 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1396
1397 struct lock_to_push {
1398         struct list_head llist;
1399         __u64 offset;
1400         __u64 length;
1401         __u32 pid;
1402         __u16 netfid;
1403         __u8 type;
1404 };
1405
1406 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1407 static int
1408 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1409 {
1410         struct inode *inode = d_inode(cfile->dentry);
1411         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1412         struct file_lock *flock;
1413         struct file_lock_context *flctx = inode->i_flctx;
1414         unsigned int count = 0, i;
1415         int rc = 0, xid, type;
1416         struct list_head locks_to_send, *el;
1417         struct lock_to_push *lck, *tmp;
1418         __u64 length;
1419
1420         xid = get_xid();
1421
1422         if (!flctx)
1423                 goto out;
1424
1425         spin_lock(&flctx->flc_lock);
1426         list_for_each(el, &flctx->flc_posix) {
1427                 count++;
1428         }
1429         spin_unlock(&flctx->flc_lock);
1430
1431         INIT_LIST_HEAD(&locks_to_send);
1432
1433         /*
1434          * Allocating count locks is enough because no FL_POSIX locks can be
1435          * added to the list while we are holding cinode->lock_sem that
1436          * protects locking operations of this inode.
1437          */
1438         for (i = 0; i < count; i++) {
1439                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1440                 if (!lck) {
1441                         rc = -ENOMEM;
1442                         goto err_out;
1443                 }
1444                 list_add_tail(&lck->llist, &locks_to_send);
1445         }
1446
1447         el = locks_to_send.next;
1448         spin_lock(&flctx->flc_lock);
1449         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1450                 if (el == &locks_to_send) {
1451                         /*
1452                          * The list ended. We don't have enough allocated
1453                          * structures - something is really wrong.
1454                          */
1455                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1456                         break;
1457                 }
1458                 length = cifs_flock_len(flock);
1459                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1460                         type = CIFS_RDLCK;
1461                 else
1462                         type = CIFS_WRLCK;
1463                 lck = list_entry(el, struct lock_to_push, llist);
1464                 lck->pid = hash_lockowner(flock->fl_owner);
1465                 lck->netfid = cfile->fid.netfid;
1466                 lck->length = length;
1467                 lck->type = type;
1468                 lck->offset = flock->fl_start;
1469         }
1470         spin_unlock(&flctx->flc_lock);
1471
1472         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1473                 int stored_rc;
1474
1475                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1476                                              lck->offset, lck->length, NULL,
1477                                              lck->type, 0);
1478                 if (stored_rc)
1479                         rc = stored_rc;
1480                 list_del(&lck->llist);
1481                 kfree(lck);
1482         }
1483
1484 out:
1485         free_xid(xid);
1486         return rc;
1487 err_out:
1488         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1489                 list_del(&lck->llist);
1490                 kfree(lck);
1491         }
1492         goto out;
1493 }
1494 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1495
1496 static int
1497 cifs_push_locks(struct cifsFileInfo *cfile)
1498 {
1499         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1500         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1501         int rc = 0;
1502 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1503         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1504 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1505
1506         /* we are going to update can_cache_brlcks here - need a write access */
1507         cifs_down_write(&cinode->lock_sem);
1508         if (!cinode->can_cache_brlcks) {
1509                 up_write(&cinode->lock_sem);
1510                 return rc;
1511         }
1512
1513 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1514         if (cap_unix(tcon->ses) &&
1515             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1516             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1517                 rc = cifs_push_posix_locks(cfile);
1518         else
1519 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1520                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1521
1522         cinode->can_cache_brlcks = false;
1523         up_write(&cinode->lock_sem);
1524         return rc;
1525 }
1526
1527 static void
1528 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1529                 bool *wait_flag, struct TCP_Server_Info *server)
1530 {
1531         if (flock->fl_flags & FL_POSIX)
1532                 cifs_dbg(FYI, "Posix\n");
1533         if (flock->fl_flags & FL_FLOCK)
1534                 cifs_dbg(FYI, "Flock\n");
1535         if (flock->fl_flags & FL_SLEEP) {
1536                 cifs_dbg(FYI, "Blocking lock\n");
1537                 *wait_flag = true;
1538         }
1539         if (flock->fl_flags & FL_ACCESS)
1540                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1541         if (flock->fl_flags & FL_LEASE)
1542                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1543         if (flock->fl_flags &
1544             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1545                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1546                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1547
1548         *type = server->vals->large_lock_type;
1549         if (flock->fl_type == F_WRLCK) {
1550                 cifs_dbg(FYI, "F_WRLCK\n");
1551                 *type |= server->vals->exclusive_lock_type;
1552                 *lock = 1;
1553         } else if (flock->fl_type == F_UNLCK) {
1554                 cifs_dbg(FYI, "F_UNLCK\n");
1555                 *type |= server->vals->unlock_lock_type;
1556                 *unlock = 1;
1557                 /* Check if unlock includes more than one lock range */
1558         } else if (flock->fl_type == F_RDLCK) {
1559                 cifs_dbg(FYI, "F_RDLCK\n");
1560                 *type |= server->vals->shared_lock_type;
1561                 *lock = 1;
1562         } else if (flock->fl_type == F_EXLCK) {
1563                 cifs_dbg(FYI, "F_EXLCK\n");
1564                 *type |= server->vals->exclusive_lock_type;
1565                 *lock = 1;
1566         } else if (flock->fl_type == F_SHLCK) {
1567                 cifs_dbg(FYI, "F_SHLCK\n");
1568                 *type |= server->vals->shared_lock_type;
1569                 *lock = 1;
1570         } else
1571                 cifs_dbg(FYI, "Unknown type of lock\n");
1572 }
1573
1574 static int
1575 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1576            bool wait_flag, bool posix_lck, unsigned int xid)
1577 {
1578         int rc = 0;
1579         __u64 length = cifs_flock_len(flock);
1580         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1581         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1582         struct TCP_Server_Info *server = tcon->ses->server;
1583 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1584         __u16 netfid = cfile->fid.netfid;
1585
1586         if (posix_lck) {
1587                 int posix_lock_type;
1588
1589                 rc = cifs_posix_lock_test(file, flock);
1590                 if (!rc)
1591                         return rc;
1592
1593                 if (type & server->vals->shared_lock_type)
1594                         posix_lock_type = CIFS_RDLCK;
1595                 else
1596                         posix_lock_type = CIFS_WRLCK;
1597                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1598                                       hash_lockowner(flock->fl_owner),
1599                                       flock->fl_start, length, flock,
1600                                       posix_lock_type, wait_flag);
1601                 return rc;
1602         }
1603 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1604
1605         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1606         if (!rc)
1607                 return rc;
1608
1609         /* BB we could chain these into one lock request BB */
1610         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1611                                     1, 0, false);
1612         if (rc == 0) {
1613                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1614                                             type, 0, 1, false);
1615                 flock->fl_type = F_UNLCK;
1616                 if (rc != 0)
1617                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1618                                  rc);
1619                 return 0;
1620         }
1621
1622         if (type & server->vals->shared_lock_type) {
1623                 flock->fl_type = F_WRLCK;
1624                 return 0;
1625         }
1626
1627         type &= ~server->vals->exclusive_lock_type;
1628
1629         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1630                                     type | server->vals->shared_lock_type,
1631                                     1, 0, false);
1632         if (rc == 0) {
1633                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1634                         type | server->vals->shared_lock_type, 0, 1, false);
1635                 flock->fl_type = F_RDLCK;
1636                 if (rc != 0)
1637                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1638                                  rc);
1639         } else
1640                 flock->fl_type = F_WRLCK;
1641
1642         return 0;
1643 }
1644
1645 void
1646 cifs_move_llist(struct list_head *source, struct list_head *dest)
1647 {
1648         struct list_head *li, *tmp;
1649         list_for_each_safe(li, tmp, source)
1650                 list_move(li, dest);
1651 }
1652
1653 void
1654 cifs_free_llist(struct list_head *llist)
1655 {
1656         struct cifsLockInfo *li, *tmp;
1657         list_for_each_entry_safe(li, tmp, llist, llist) {
1658                 cifs_del_lock_waiters(li);
1659                 list_del(&li->llist);
1660                 kfree(li);
1661         }
1662 }
1663
1664 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1665 int
1666 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1667                   unsigned int xid)
1668 {
1669         int rc = 0, stored_rc;
1670         static const int types[] = {
1671                 LOCKING_ANDX_LARGE_FILES,
1672                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1673         };
1674         unsigned int i;
1675         unsigned int max_num, num, max_buf;
1676         LOCKING_ANDX_RANGE *buf, *cur;
1677         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1678         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1679         struct cifsLockInfo *li, *tmp;
1680         __u64 length = cifs_flock_len(flock);
1681         struct list_head tmp_llist;
1682
1683         INIT_LIST_HEAD(&tmp_llist);
1684
1685         /*
1686          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1687          * and check it before using.
1688          */
1689         max_buf = tcon->ses->server->maxBuf;
1690         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1691                 return -EINVAL;
1692
1693         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1694                      PAGE_SIZE);
1695         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1696                         PAGE_SIZE);
1697         max_num = (max_buf - sizeof(struct smb_hdr)) /
1698                                                 sizeof(LOCKING_ANDX_RANGE);
1699         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1700         if (!buf)
1701                 return -ENOMEM;
1702
1703         cifs_down_write(&cinode->lock_sem);
1704         for (i = 0; i < 2; i++) {
1705                 cur = buf;
1706                 num = 0;
1707                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1708                         if (flock->fl_start > li->offset ||
1709                             (flock->fl_start + length) <
1710                             (li->offset + li->length))
1711                                 continue;
1712                         if (current->tgid != li->pid)
1713                                 continue;
1714                         if (types[i] != li->type)
1715                                 continue;
1716                         if (cinode->can_cache_brlcks) {
1717                                 /*
1718                                  * We can cache brlock requests - simply remove
1719                                  * a lock from the file's list.
1720                                  */
1721                                 list_del(&li->llist);
1722                                 cifs_del_lock_waiters(li);
1723                                 kfree(li);
1724                                 continue;
1725                         }
1726                         cur->Pid = cpu_to_le16(li->pid);
1727                         cur->LengthLow = cpu_to_le32((u32)li->length);
1728                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1729                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1730                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1731                         /*
1732                          * We need to save a lock here to let us add it again to
1733                          * the file's list if the unlock range request fails on
1734                          * the server.
1735                          */
1736                         list_move(&li->llist, &tmp_llist);
1737                         if (++num == max_num) {
1738                                 stored_rc = cifs_lockv(xid, tcon,
1739                                                        cfile->fid.netfid,
1740                                                        li->type, num, 0, buf);
1741                                 if (stored_rc) {
1742                                         /*
1743                                          * We failed on the unlock range
1744                                          * request - add all locks from the tmp
1745                                          * list to the head of the file's list.
1746                                          */
1747                                         cifs_move_llist(&tmp_llist,
1748                                                         &cfile->llist->locks);
1749                                         rc = stored_rc;
1750                                 } else
1751                                         /*
1752                                          * The unlock range request succeed -
1753                                          * free the tmp list.
1754                                          */
1755                                         cifs_free_llist(&tmp_llist);
1756                                 cur = buf;
1757                                 num = 0;
1758                         } else
1759                                 cur++;
1760                 }
1761                 if (num) {
1762                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1763                                                types[i], num, 0, buf);
1764                         if (stored_rc) {
1765                                 cifs_move_llist(&tmp_llist,
1766                                                 &cfile->llist->locks);
1767                                 rc = stored_rc;
1768                         } else
1769                                 cifs_free_llist(&tmp_llist);
1770                 }
1771         }
1772
1773         up_write(&cinode->lock_sem);
1774         kfree(buf);
1775         return rc;
1776 }
1777 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1778
1779 static int
1780 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1781            bool wait_flag, bool posix_lck, int lock, int unlock,
1782            unsigned int xid)
1783 {
1784         int rc = 0;
1785         __u64 length = cifs_flock_len(flock);
1786         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1787         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1788         struct TCP_Server_Info *server = tcon->ses->server;
1789         struct inode *inode = d_inode(cfile->dentry);
1790
1791 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1792         if (posix_lck) {
1793                 int posix_lock_type;
1794
1795                 rc = cifs_posix_lock_set(file, flock);
1796                 if (rc <= FILE_LOCK_DEFERRED)
1797                         return rc;
1798
1799                 if (type & server->vals->shared_lock_type)
1800                         posix_lock_type = CIFS_RDLCK;
1801                 else
1802                         posix_lock_type = CIFS_WRLCK;
1803
1804                 if (unlock == 1)
1805                         posix_lock_type = CIFS_UNLCK;
1806
1807                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1808                                       hash_lockowner(flock->fl_owner),
1809                                       flock->fl_start, length,
1810                                       NULL, posix_lock_type, wait_flag);
1811                 goto out;
1812         }
1813 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1814         if (lock) {
1815                 struct cifsLockInfo *lock;
1816
1817                 lock = cifs_lock_init(flock->fl_start, length, type,
1818                                       flock->fl_flags);
1819                 if (!lock)
1820                         return -ENOMEM;
1821
1822                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1823                 if (rc < 0) {
1824                         kfree(lock);
1825                         return rc;
1826                 }
1827                 if (!rc)
1828                         goto out;
1829
1830                 /*
1831                  * Windows 7 server can delay breaking lease from read to None
1832                  * if we set a byte-range lock on a file - break it explicitly
1833                  * before sending the lock to the server to be sure the next
1834                  * read won't conflict with non-overlapted locks due to
1835                  * pagereading.
1836                  */
1837                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1838                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1839                         cifs_zap_mapping(inode);
1840                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1841                                  inode);
1842                         CIFS_I(inode)->oplock = 0;
1843                 }
1844
1845                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1846                                             type, 1, 0, wait_flag);
1847                 if (rc) {
1848                         kfree(lock);
1849                         return rc;
1850                 }
1851
1852                 cifs_lock_add(cfile, lock);
1853         } else if (unlock)
1854                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1855
1856 out:
1857         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1858                 /*
1859                  * If this is a request to remove all locks because we
1860                  * are closing the file, it doesn't matter if the
1861                  * unlocking failed as both cifs.ko and the SMB server
1862                  * remove the lock on file close
1863                  */
1864                 if (rc) {
1865                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1866                         if (!(flock->fl_flags & FL_CLOSE))
1867                                 return rc;
1868                 }
1869                 rc = locks_lock_file_wait(file, flock);
1870         }
1871         return rc;
1872 }
1873
1874 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1875 {
1876         int rc, xid;
1877         int lock = 0, unlock = 0;
1878         bool wait_flag = false;
1879         bool posix_lck = false;
1880         struct cifs_sb_info *cifs_sb;
1881         struct cifs_tcon *tcon;
1882         struct cifsFileInfo *cfile;
1883         __u32 type;
1884
1885         rc = -EACCES;
1886         xid = get_xid();
1887
1888         if (!(fl->fl_flags & FL_FLOCK))
1889                 return -ENOLCK;
1890
1891         cfile = (struct cifsFileInfo *)file->private_data;
1892         tcon = tlink_tcon(cfile->tlink);
1893
1894         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1895                         tcon->ses->server);
1896         cifs_sb = CIFS_FILE_SB(file);
1897
1898         if (cap_unix(tcon->ses) &&
1899             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1900             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1901                 posix_lck = true;
1902
1903         if (!lock && !unlock) {
1904                 /*
1905                  * if no lock or unlock then nothing to do since we do not
1906                  * know what it is
1907                  */
1908                 free_xid(xid);
1909                 return -EOPNOTSUPP;
1910         }
1911
1912         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1913                         xid);
1914         free_xid(xid);
1915         return rc;
1916
1917
1918 }
1919
1920 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1921 {
1922         int rc, xid;
1923         int lock = 0, unlock = 0;
1924         bool wait_flag = false;
1925         bool posix_lck = false;
1926         struct cifs_sb_info *cifs_sb;
1927         struct cifs_tcon *tcon;
1928         struct cifsFileInfo *cfile;
1929         __u32 type;
1930
1931         rc = -EACCES;
1932         xid = get_xid();
1933
1934         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
1935                  flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
1936                  (long long)flock->fl_end);
1937
1938         cfile = (struct cifsFileInfo *)file->private_data;
1939         tcon = tlink_tcon(cfile->tlink);
1940
1941         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1942                         tcon->ses->server);
1943         cifs_sb = CIFS_FILE_SB(file);
1944         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1945
1946         if (cap_unix(tcon->ses) &&
1947             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1948             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1949                 posix_lck = true;
1950         /*
1951          * BB add code here to normalize offset and length to account for
1952          * negative length which we can not accept over the wire.
1953          */
1954         if (IS_GETLK(cmd)) {
1955                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1956                 free_xid(xid);
1957                 return rc;
1958         }
1959
1960         if (!lock && !unlock) {
1961                 /*
1962                  * if no lock or unlock then nothing to do since we do not
1963                  * know what it is
1964                  */
1965                 free_xid(xid);
1966                 return -EOPNOTSUPP;
1967         }
1968
1969         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1970                         xid);
1971         free_xid(xid);
1972         return rc;
1973 }
1974
1975 /*
1976  * update the file size (if needed) after a write. Should be called with
1977  * the inode->i_lock held
1978  */
1979 void
1980 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1981                       unsigned int bytes_written)
1982 {
1983         loff_t end_of_write = offset + bytes_written;
1984
1985         if (end_of_write > cifsi->server_eof)
1986                 cifsi->server_eof = end_of_write;
1987 }
1988
1989 static ssize_t
1990 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1991            size_t write_size, loff_t *offset)
1992 {
1993         int rc = 0;
1994         unsigned int bytes_written = 0;
1995         unsigned int total_written;
1996         struct cifs_tcon *tcon;
1997         struct TCP_Server_Info *server;
1998         unsigned int xid;
1999         struct dentry *dentry = open_file->dentry;
2000         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2001         struct cifs_io_parms io_parms = {0};
2002
2003         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2004                  write_size, *offset, dentry);
2005
2006         tcon = tlink_tcon(open_file->tlink);
2007         server = tcon->ses->server;
2008
2009         if (!server->ops->sync_write)
2010                 return -ENOSYS;
2011
2012         xid = get_xid();
2013
2014         for (total_written = 0; write_size > total_written;
2015              total_written += bytes_written) {
2016                 rc = -EAGAIN;
2017                 while (rc == -EAGAIN) {
2018                         struct kvec iov[2];
2019                         unsigned int len;
2020
2021                         if (open_file->invalidHandle) {
2022                                 /* we could deadlock if we called
2023                                    filemap_fdatawait from here so tell
2024                                    reopen_file not to flush data to
2025                                    server now */
2026                                 rc = cifs_reopen_file(open_file, false);
2027                                 if (rc != 0)
2028                                         break;
2029                         }
2030
2031                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2032                                   (unsigned int)write_size - total_written);
2033                         /* iov[0] is reserved for smb header */
2034                         iov[1].iov_base = (char *)write_data + total_written;
2035                         iov[1].iov_len = len;
2036                         io_parms.pid = pid;
2037                         io_parms.tcon = tcon;
2038                         io_parms.offset = *offset;
2039                         io_parms.length = len;
2040                         rc = server->ops->sync_write(xid, &open_file->fid,
2041                                         &io_parms, &bytes_written, iov, 1);
2042                 }
2043                 if (rc || (bytes_written == 0)) {
2044                         if (total_written)
2045                                 break;
2046                         else {
2047                                 free_xid(xid);
2048                                 return rc;
2049                         }
2050                 } else {
2051                         spin_lock(&d_inode(dentry)->i_lock);
2052                         cifs_update_eof(cifsi, *offset, bytes_written);
2053                         spin_unlock(&d_inode(dentry)->i_lock);
2054                         *offset += bytes_written;
2055                 }
2056         }
2057
2058         cifs_stats_bytes_written(tcon, total_written);
2059
2060         if (total_written > 0) {
2061                 spin_lock(&d_inode(dentry)->i_lock);
2062                 if (*offset > d_inode(dentry)->i_size) {
2063                         i_size_write(d_inode(dentry), *offset);
2064                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2065                 }
2066                 spin_unlock(&d_inode(dentry)->i_lock);
2067         }
2068         mark_inode_dirty_sync(d_inode(dentry));
2069         free_xid(xid);
2070         return total_written;
2071 }
2072
2073 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2074                                         bool fsuid_only)
2075 {
2076         struct cifsFileInfo *open_file = NULL;
2077         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2078
2079         /* only filter by fsuid on multiuser mounts */
2080         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2081                 fsuid_only = false;
2082
2083         spin_lock(&cifs_inode->open_file_lock);
2084         /* we could simply get the first_list_entry since write-only entries
2085            are always at the end of the list but since the first entry might
2086            have a close pending, we go through the whole list */
2087         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2088                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2089                         continue;
2090                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2091                         if ((!open_file->invalidHandle)) {
2092                                 /* found a good file */
2093                                 /* lock it so it will not be closed on us */
2094                                 cifsFileInfo_get(open_file);
2095                                 spin_unlock(&cifs_inode->open_file_lock);
2096                                 return open_file;
2097                         } /* else might as well continue, and look for
2098                              another, or simply have the caller reopen it
2099                              again rather than trying to fix this handle */
2100                 } else /* write only file */
2101                         break; /* write only files are last so must be done */
2102         }
2103         spin_unlock(&cifs_inode->open_file_lock);
2104         return NULL;
2105 }
2106
2107 /* Return -EBADF if no handle is found and general rc otherwise */
2108 int
2109 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2110                        struct cifsFileInfo **ret_file)
2111 {
2112         struct cifsFileInfo *open_file, *inv_file = NULL;
2113         struct cifs_sb_info *cifs_sb;
2114         bool any_available = false;
2115         int rc = -EBADF;
2116         unsigned int refind = 0;
2117         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2118         bool with_delete = flags & FIND_WR_WITH_DELETE;
2119         *ret_file = NULL;
2120
2121         /*
2122          * Having a null inode here (because mapping->host was set to zero by
2123          * the VFS or MM) should not happen but we had reports of on oops (due
2124          * to it being zero) during stress testcases so we need to check for it
2125          */
2126
2127         if (cifs_inode == NULL) {
2128                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2129                 dump_stack();
2130                 return rc;
2131         }
2132
2133         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2134
2135         /* only filter by fsuid on multiuser mounts */
2136         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2137                 fsuid_only = false;
2138
2139         spin_lock(&cifs_inode->open_file_lock);
2140 refind_writable:
2141         if (refind > MAX_REOPEN_ATT) {
2142                 spin_unlock(&cifs_inode->open_file_lock);
2143                 return rc;
2144         }
2145         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2146                 if (!any_available && open_file->pid != current->tgid)
2147                         continue;
2148                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2149                         continue;
2150                 if (with_delete && !(open_file->fid.access & DELETE))
2151                         continue;
2152                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2153                         if (!open_file->invalidHandle) {
2154                                 /* found a good writable file */
2155                                 cifsFileInfo_get(open_file);
2156                                 spin_unlock(&cifs_inode->open_file_lock);
2157                                 *ret_file = open_file;
2158                                 return 0;
2159                         } else {
2160                                 if (!inv_file)
2161                                         inv_file = open_file;
2162                         }
2163                 }
2164         }
2165         /* couldn't find useable FH with same pid, try any available */
2166         if (!any_available) {
2167                 any_available = true;
2168                 goto refind_writable;
2169         }
2170
2171         if (inv_file) {
2172                 any_available = false;
2173                 cifsFileInfo_get(inv_file);
2174         }
2175
2176         spin_unlock(&cifs_inode->open_file_lock);
2177
2178         if (inv_file) {
2179                 rc = cifs_reopen_file(inv_file, false);
2180                 if (!rc) {
2181                         *ret_file = inv_file;
2182                         return 0;
2183                 }
2184
2185                 spin_lock(&cifs_inode->open_file_lock);
2186                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2187                 spin_unlock(&cifs_inode->open_file_lock);
2188                 cifsFileInfo_put(inv_file);
2189                 ++refind;
2190                 inv_file = NULL;
2191                 spin_lock(&cifs_inode->open_file_lock);
2192                 goto refind_writable;
2193         }
2194
2195         return rc;
2196 }
2197
2198 struct cifsFileInfo *
2199 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2200 {
2201         struct cifsFileInfo *cfile;
2202         int rc;
2203
2204         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2205         if (rc)
2206                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2207
2208         return cfile;
2209 }
2210
2211 int
2212 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2213                        int flags,
2214                        struct cifsFileInfo **ret_file)
2215 {
2216         struct cifsFileInfo *cfile;
2217         void *page = alloc_dentry_path();
2218
2219         *ret_file = NULL;
2220
2221         spin_lock(&tcon->open_file_lock);
2222         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2223                 struct cifsInodeInfo *cinode;
2224                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2225                 if (IS_ERR(full_path)) {
2226                         spin_unlock(&tcon->open_file_lock);
2227                         free_dentry_path(page);
2228                         return PTR_ERR(full_path);
2229                 }
2230                 if (strcmp(full_path, name))
2231                         continue;
2232
2233                 cinode = CIFS_I(d_inode(cfile->dentry));
2234                 spin_unlock(&tcon->open_file_lock);
2235                 free_dentry_path(page);
2236                 return cifs_get_writable_file(cinode, flags, ret_file);
2237         }
2238
2239         spin_unlock(&tcon->open_file_lock);
2240         free_dentry_path(page);
2241         return -ENOENT;
2242 }
2243
2244 int
2245 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2246                        struct cifsFileInfo **ret_file)
2247 {
2248         struct cifsFileInfo *cfile;
2249         void *page = alloc_dentry_path();
2250
2251         *ret_file = NULL;
2252
2253         spin_lock(&tcon->open_file_lock);
2254         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2255                 struct cifsInodeInfo *cinode;
2256                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2257                 if (IS_ERR(full_path)) {
2258                         spin_unlock(&tcon->open_file_lock);
2259                         free_dentry_path(page);
2260                         return PTR_ERR(full_path);
2261                 }
2262                 if (strcmp(full_path, name))
2263                         continue;
2264
2265                 cinode = CIFS_I(d_inode(cfile->dentry));
2266                 spin_unlock(&tcon->open_file_lock);
2267                 free_dentry_path(page);
2268                 *ret_file = find_readable_file(cinode, 0);
2269                 return *ret_file ? 0 : -ENOENT;
2270         }
2271
2272         spin_unlock(&tcon->open_file_lock);
2273         free_dentry_path(page);
2274         return -ENOENT;
2275 }
2276
2277 void
2278 cifs_writedata_release(struct kref *refcount)
2279 {
2280         struct cifs_writedata *wdata = container_of(refcount,
2281                                         struct cifs_writedata, refcount);
2282 #ifdef CONFIG_CIFS_SMB_DIRECT
2283         if (wdata->mr) {
2284                 smbd_deregister_mr(wdata->mr);
2285                 wdata->mr = NULL;
2286         }
2287 #endif
2288
2289         if (wdata->cfile)
2290                 cifsFileInfo_put(wdata->cfile);
2291
2292         kvfree(wdata->pages);
2293         kfree(wdata);
2294 }
2295
2296 /*
2297  * Write failed with a retryable error. Resend the write request. It's also
2298  * possible that the page was redirtied so re-clean the page.
2299  */
2300 static void
2301 cifs_writev_requeue(struct cifs_writedata *wdata)
2302 {
2303         int i, rc = 0;
2304         struct inode *inode = d_inode(wdata->cfile->dentry);
2305         struct TCP_Server_Info *server;
2306         unsigned int rest_len;
2307
2308         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2309         i = 0;
2310         rest_len = wdata->bytes;
2311         do {
2312                 struct cifs_writedata *wdata2;
2313                 unsigned int j, nr_pages, wsize, tailsz, cur_len;
2314
2315                 wsize = server->ops->wp_retry_size(inode);
2316                 if (wsize < rest_len) {
2317                         nr_pages = wsize / PAGE_SIZE;
2318                         if (!nr_pages) {
2319                                 rc = -EOPNOTSUPP;
2320                                 break;
2321                         }
2322                         cur_len = nr_pages * PAGE_SIZE;
2323                         tailsz = PAGE_SIZE;
2324                 } else {
2325                         nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
2326                         cur_len = rest_len;
2327                         tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
2328                 }
2329
2330                 wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
2331                 if (!wdata2) {
2332                         rc = -ENOMEM;
2333                         break;
2334                 }
2335
2336                 for (j = 0; j < nr_pages; j++) {
2337                         wdata2->pages[j] = wdata->pages[i + j];
2338                         lock_page(wdata2->pages[j]);
2339                         clear_page_dirty_for_io(wdata2->pages[j]);
2340                 }
2341
2342                 wdata2->sync_mode = wdata->sync_mode;
2343                 wdata2->nr_pages = nr_pages;
2344                 wdata2->offset = page_offset(wdata2->pages[0]);
2345                 wdata2->pagesz = PAGE_SIZE;
2346                 wdata2->tailsz = tailsz;
2347                 wdata2->bytes = cur_len;
2348
2349                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2350                                             &wdata2->cfile);
2351                 if (!wdata2->cfile) {
2352                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2353                                  rc);
2354                         if (!is_retryable_error(rc))
2355                                 rc = -EBADF;
2356                 } else {
2357                         wdata2->pid = wdata2->cfile->pid;
2358                         rc = server->ops->async_writev(wdata2,
2359                                                        cifs_writedata_release);
2360                 }
2361
2362                 for (j = 0; j < nr_pages; j++) {
2363                         unlock_page(wdata2->pages[j]);
2364                         if (rc != 0 && !is_retryable_error(rc)) {
2365                                 SetPageError(wdata2->pages[j]);
2366                                 end_page_writeback(wdata2->pages[j]);
2367                                 put_page(wdata2->pages[j]);
2368                         }
2369                 }
2370
2371                 kref_put(&wdata2->refcount, cifs_writedata_release);
2372                 if (rc) {
2373                         if (is_retryable_error(rc))
2374                                 continue;
2375                         i += nr_pages;
2376                         break;
2377                 }
2378
2379                 rest_len -= cur_len;
2380                 i += nr_pages;
2381         } while (i < wdata->nr_pages);
2382
2383         /* cleanup remaining pages from the original wdata */
2384         for (; i < wdata->nr_pages; i++) {
2385                 SetPageError(wdata->pages[i]);
2386                 end_page_writeback(wdata->pages[i]);
2387                 put_page(wdata->pages[i]);
2388         }
2389
2390         if (rc != 0 && !is_retryable_error(rc))
2391                 mapping_set_error(inode->i_mapping, rc);
2392         kref_put(&wdata->refcount, cifs_writedata_release);
2393 }
2394
2395 void
2396 cifs_writev_complete(struct work_struct *work)
2397 {
2398         struct cifs_writedata *wdata = container_of(work,
2399                                                 struct cifs_writedata, work);
2400         struct inode *inode = d_inode(wdata->cfile->dentry);
2401         int i = 0;
2402
2403         if (wdata->result == 0) {
2404                 spin_lock(&inode->i_lock);
2405                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2406                 spin_unlock(&inode->i_lock);
2407                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2408                                          wdata->bytes);
2409         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2410                 return cifs_writev_requeue(wdata);
2411
2412         for (i = 0; i < wdata->nr_pages; i++) {
2413                 struct page *page = wdata->pages[i];
2414
2415                 if (wdata->result == -EAGAIN)
2416                         __set_page_dirty_nobuffers(page);
2417                 else if (wdata->result < 0)
2418                         SetPageError(page);
2419                 end_page_writeback(page);
2420                 cifs_readpage_to_fscache(inode, page);
2421                 put_page(page);
2422         }
2423         if (wdata->result != -EAGAIN)
2424                 mapping_set_error(inode->i_mapping, wdata->result);
2425         kref_put(&wdata->refcount, cifs_writedata_release);
2426 }
2427
2428 struct cifs_writedata *
2429 cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
2430 {
2431         struct page **pages =
2432                 kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
2433         if (pages)
2434                 return cifs_writedata_direct_alloc(pages, complete);
2435
2436         return NULL;
2437 }
2438
2439 struct cifs_writedata *
2440 cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
2441 {
2442         struct cifs_writedata *wdata;
2443
2444         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2445         if (wdata != NULL) {
2446                 wdata->pages = pages;
2447                 kref_init(&wdata->refcount);
2448                 INIT_LIST_HEAD(&wdata->list);
2449                 init_completion(&wdata->done);
2450                 INIT_WORK(&wdata->work, complete);
2451         }
2452         return wdata;
2453 }
2454
2455
2456 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2457 {
2458         struct address_space *mapping = page->mapping;
2459         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2460         char *write_data;
2461         int rc = -EFAULT;
2462         int bytes_written = 0;
2463         struct inode *inode;
2464         struct cifsFileInfo *open_file;
2465
2466         if (!mapping || !mapping->host)
2467                 return -EFAULT;
2468
2469         inode = page->mapping->host;
2470
2471         offset += (loff_t)from;
2472         write_data = kmap(page);
2473         write_data += from;
2474
2475         if ((to > PAGE_SIZE) || (from > to)) {
2476                 kunmap(page);
2477                 return -EIO;
2478         }
2479
2480         /* racing with truncate? */
2481         if (offset > mapping->host->i_size) {
2482                 kunmap(page);
2483                 return 0; /* don't care */
2484         }
2485
2486         /* check to make sure that we are not extending the file */
2487         if (mapping->host->i_size - offset < (loff_t)to)
2488                 to = (unsigned)(mapping->host->i_size - offset);
2489
2490         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2491                                     &open_file);
2492         if (!rc) {
2493                 bytes_written = cifs_write(open_file, open_file->pid,
2494                                            write_data, to - from, &offset);
2495                 cifsFileInfo_put(open_file);
2496                 /* Does mm or vfs already set times? */
2497                 inode->i_atime = inode->i_mtime = current_time(inode);
2498                 if ((bytes_written > 0) && (offset))
2499                         rc = 0;
2500                 else if (bytes_written < 0)
2501                         rc = bytes_written;
2502                 else
2503                         rc = -EFAULT;
2504         } else {
2505                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2506                 if (!is_retryable_error(rc))
2507                         rc = -EIO;
2508         }
2509
2510         kunmap(page);
2511         return rc;
2512 }
2513
2514 static struct cifs_writedata *
2515 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2516                           pgoff_t end, pgoff_t *index,
2517                           unsigned int *found_pages)
2518 {
2519         struct cifs_writedata *wdata;
2520
2521         wdata = cifs_writedata_alloc((unsigned int)tofind,
2522                                      cifs_writev_complete);
2523         if (!wdata)
2524                 return NULL;
2525
2526         *found_pages = find_get_pages_range_tag(mapping, index, end,
2527                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2528         return wdata;
2529 }
2530
2531 static unsigned int
2532 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2533                     struct address_space *mapping,
2534                     struct writeback_control *wbc,
2535                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2536 {
2537         unsigned int nr_pages = 0, i;
2538         struct page *page;
2539
2540         for (i = 0; i < found_pages; i++) {
2541                 page = wdata->pages[i];
2542                 /*
2543                  * At this point we hold neither the i_pages lock nor the
2544                  * page lock: the page may be truncated or invalidated
2545                  * (changing page->mapping to NULL), or even swizzled
2546                  * back from swapper_space to tmpfs file mapping
2547                  */
2548
2549                 if (nr_pages == 0)
2550                         lock_page(page);
2551                 else if (!trylock_page(page))
2552                         break;
2553
2554                 if (unlikely(page->mapping != mapping)) {
2555                         unlock_page(page);
2556                         break;
2557                 }
2558
2559                 if (!wbc->range_cyclic && page->index > end) {
2560                         *done = true;
2561                         unlock_page(page);
2562                         break;
2563                 }
2564
2565                 if (*next && (page->index != *next)) {
2566                         /* Not next consecutive page */
2567                         unlock_page(page);
2568                         break;
2569                 }
2570
2571                 if (wbc->sync_mode != WB_SYNC_NONE)
2572                         wait_on_page_writeback(page);
2573
2574                 if (PageWriteback(page) ||
2575                                 !clear_page_dirty_for_io(page)) {
2576                         unlock_page(page);
2577                         break;
2578                 }
2579
2580                 /*
2581                  * This actually clears the dirty bit in the radix tree.
2582                  * See cifs_writepage() for more commentary.
2583                  */
2584                 set_page_writeback(page);
2585                 if (page_offset(page) >= i_size_read(mapping->host)) {
2586                         *done = true;
2587                         unlock_page(page);
2588                         end_page_writeback(page);
2589                         break;
2590                 }
2591
2592                 wdata->pages[i] = page;
2593                 *next = page->index + 1;
2594                 ++nr_pages;
2595         }
2596
2597         /* reset index to refind any pages skipped */
2598         if (nr_pages == 0)
2599                 *index = wdata->pages[0]->index + 1;
2600
2601         /* put any pages we aren't going to use */
2602         for (i = nr_pages; i < found_pages; i++) {
2603                 put_page(wdata->pages[i]);
2604                 wdata->pages[i] = NULL;
2605         }
2606
2607         return nr_pages;
2608 }
2609
2610 static int
2611 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2612                  struct address_space *mapping, struct writeback_control *wbc)
2613 {
2614         int rc;
2615
2616         wdata->sync_mode = wbc->sync_mode;
2617         wdata->nr_pages = nr_pages;
2618         wdata->offset = page_offset(wdata->pages[0]);
2619         wdata->pagesz = PAGE_SIZE;
2620         wdata->tailsz = min(i_size_read(mapping->host) -
2621                         page_offset(wdata->pages[nr_pages - 1]),
2622                         (loff_t)PAGE_SIZE);
2623         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2624         wdata->pid = wdata->cfile->pid;
2625
2626         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2627         if (rc)
2628                 return rc;
2629
2630         if (wdata->cfile->invalidHandle)
2631                 rc = -EAGAIN;
2632         else
2633                 rc = wdata->server->ops->async_writev(wdata,
2634                                                       cifs_writedata_release);
2635
2636         return rc;
2637 }
2638
2639 static int cifs_writepages(struct address_space *mapping,
2640                            struct writeback_control *wbc)
2641 {
2642         struct inode *inode = mapping->host;
2643         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2644         struct TCP_Server_Info *server;
2645         bool done = false, scanned = false, range_whole = false;
2646         pgoff_t end, index;
2647         struct cifs_writedata *wdata;
2648         struct cifsFileInfo *cfile = NULL;
2649         int rc = 0;
2650         int saved_rc = 0;
2651         unsigned int xid;
2652
2653         /*
2654          * If wsize is smaller than the page cache size, default to writing
2655          * one page at a time via cifs_writepage
2656          */
2657         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2658                 return generic_writepages(mapping, wbc);
2659
2660         xid = get_xid();
2661         if (wbc->range_cyclic) {
2662                 index = mapping->writeback_index; /* Start from prev offset */
2663                 end = -1;
2664         } else {
2665                 index = wbc->range_start >> PAGE_SHIFT;
2666                 end = wbc->range_end >> PAGE_SHIFT;
2667                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2668                         range_whole = true;
2669                 scanned = true;
2670         }
2671         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2672
2673 retry:
2674         while (!done && index <= end) {
2675                 unsigned int i, nr_pages, found_pages, wsize;
2676                 pgoff_t next = 0, tofind, saved_index = index;
2677                 struct cifs_credits credits_on_stack;
2678                 struct cifs_credits *credits = &credits_on_stack;
2679                 int get_file_rc = 0;
2680
2681                 if (cfile)
2682                         cifsFileInfo_put(cfile);
2683
2684                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2685
2686                 /* in case of an error store it to return later */
2687                 if (rc)
2688                         get_file_rc = rc;
2689
2690                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2691                                                    &wsize, credits);
2692                 if (rc != 0) {
2693                         done = true;
2694                         break;
2695                 }
2696
2697                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2698
2699                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2700                                                   &found_pages);
2701                 if (!wdata) {
2702                         rc = -ENOMEM;
2703                         done = true;
2704                         add_credits_and_wake_if(server, credits, 0);
2705                         break;
2706                 }
2707
2708                 if (found_pages == 0) {
2709                         kref_put(&wdata->refcount, cifs_writedata_release);
2710                         add_credits_and_wake_if(server, credits, 0);
2711                         break;
2712                 }
2713
2714                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2715                                                end, &index, &next, &done);
2716
2717                 /* nothing to write? */
2718                 if (nr_pages == 0) {
2719                         kref_put(&wdata->refcount, cifs_writedata_release);
2720                         add_credits_and_wake_if(server, credits, 0);
2721                         continue;
2722                 }
2723
2724                 wdata->credits = credits_on_stack;
2725                 wdata->cfile = cfile;
2726                 wdata->server = server;
2727                 cfile = NULL;
2728
2729                 if (!wdata->cfile) {
2730                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2731                                  get_file_rc);
2732                         if (is_retryable_error(get_file_rc))
2733                                 rc = get_file_rc;
2734                         else
2735                                 rc = -EBADF;
2736                 } else
2737                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2738
2739                 for (i = 0; i < nr_pages; ++i)
2740                         unlock_page(wdata->pages[i]);
2741
2742                 /* send failure -- clean up the mess */
2743                 if (rc != 0) {
2744                         add_credits_and_wake_if(server, &wdata->credits, 0);
2745                         for (i = 0; i < nr_pages; ++i) {
2746                                 if (is_retryable_error(rc))
2747                                         redirty_page_for_writepage(wbc,
2748                                                            wdata->pages[i]);
2749                                 else
2750                                         SetPageError(wdata->pages[i]);
2751                                 end_page_writeback(wdata->pages[i]);
2752                                 put_page(wdata->pages[i]);
2753                         }
2754                         if (!is_retryable_error(rc))
2755                                 mapping_set_error(mapping, rc);
2756                 }
2757                 kref_put(&wdata->refcount, cifs_writedata_release);
2758
2759                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2760                         index = saved_index;
2761                         continue;
2762                 }
2763
2764                 /* Return immediately if we received a signal during writing */
2765                 if (is_interrupt_error(rc)) {
2766                         done = true;
2767                         break;
2768                 }
2769
2770                 if (rc != 0 && saved_rc == 0)
2771                         saved_rc = rc;
2772
2773                 wbc->nr_to_write -= nr_pages;
2774                 if (wbc->nr_to_write <= 0)
2775                         done = true;
2776
2777                 index = next;
2778         }
2779
2780         if (!scanned && !done) {
2781                 /*
2782                  * We hit the last page and there is more work to be done: wrap
2783                  * back to the start of the file
2784                  */
2785                 scanned = true;
2786                 index = 0;
2787                 goto retry;
2788         }
2789
2790         if (saved_rc != 0)
2791                 rc = saved_rc;
2792
2793         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2794                 mapping->writeback_index = index;
2795
2796         if (cfile)
2797                 cifsFileInfo_put(cfile);
2798         free_xid(xid);
2799         /* Indication to update ctime and mtime as close is deferred */
2800         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2801         return rc;
2802 }
2803
2804 static int
2805 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2806 {
2807         int rc;
2808         unsigned int xid;
2809
2810         xid = get_xid();
2811 /* BB add check for wbc flags */
2812         get_page(page);
2813         if (!PageUptodate(page))
2814                 cifs_dbg(FYI, "ppw - page not up to date\n");
2815
2816         /*
2817          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2818          *
2819          * A writepage() implementation always needs to do either this,
2820          * or re-dirty the page with "redirty_page_for_writepage()" in
2821          * the case of a failure.
2822          *
2823          * Just unlocking the page will cause the radix tree tag-bits
2824          * to fail to update with the state of the page correctly.
2825          */
2826         set_page_writeback(page);
2827 retry_write:
2828         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2829         if (is_retryable_error(rc)) {
2830                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2831                         goto retry_write;
2832                 redirty_page_for_writepage(wbc, page);
2833         } else if (rc != 0) {
2834                 SetPageError(page);
2835                 mapping_set_error(page->mapping, rc);
2836         } else {
2837                 SetPageUptodate(page);
2838         }
2839         end_page_writeback(page);
2840         put_page(page);
2841         free_xid(xid);
2842         return rc;
2843 }
2844
2845 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2846 {
2847         int rc = cifs_writepage_locked(page, wbc);
2848         unlock_page(page);
2849         return rc;
2850 }
2851
2852 static int cifs_write_end(struct file *file, struct address_space *mapping,
2853                         loff_t pos, unsigned len, unsigned copied,
2854                         struct page *page, void *fsdata)
2855 {
2856         int rc;
2857         struct inode *inode = mapping->host;
2858         struct cifsFileInfo *cfile = file->private_data;
2859         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2860         __u32 pid;
2861
2862         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2863                 pid = cfile->pid;
2864         else
2865                 pid = current->tgid;
2866
2867         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2868                  page, pos, copied);
2869
2870         if (PageChecked(page)) {
2871                 if (copied == len)
2872                         SetPageUptodate(page);
2873                 ClearPageChecked(page);
2874         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2875                 SetPageUptodate(page);
2876
2877         if (!PageUptodate(page)) {
2878                 char *page_data;
2879                 unsigned offset = pos & (PAGE_SIZE - 1);
2880                 unsigned int xid;
2881
2882                 xid = get_xid();
2883                 /* this is probably better than directly calling
2884                    partialpage_write since in this function the file handle is
2885                    known which we might as well leverage */
2886                 /* BB check if anything else missing out of ppw
2887                    such as updating last write time */
2888                 page_data = kmap(page);
2889                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2890                 /* if (rc < 0) should we set writebehind rc? */
2891                 kunmap(page);
2892
2893                 free_xid(xid);
2894         } else {
2895                 rc = copied;
2896                 pos += copied;
2897                 set_page_dirty(page);
2898         }
2899
2900         if (rc > 0) {
2901                 spin_lock(&inode->i_lock);
2902                 if (pos > inode->i_size) {
2903                         i_size_write(inode, pos);
2904                         inode->i_blocks = (512 - 1 + pos) >> 9;
2905                 }
2906                 spin_unlock(&inode->i_lock);
2907         }
2908
2909         unlock_page(page);
2910         put_page(page);
2911         /* Indication to update ctime and mtime as close is deferred */
2912         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2913
2914         return rc;
2915 }
2916
2917 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2918                       int datasync)
2919 {
2920         unsigned int xid;
2921         int rc = 0;
2922         struct cifs_tcon *tcon;
2923         struct TCP_Server_Info *server;
2924         struct cifsFileInfo *smbfile = file->private_data;
2925         struct inode *inode = file_inode(file);
2926         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2927
2928         rc = file_write_and_wait_range(file, start, end);
2929         if (rc) {
2930                 trace_cifs_fsync_err(inode->i_ino, rc);
2931                 return rc;
2932         }
2933
2934         xid = get_xid();
2935
2936         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2937                  file, datasync);
2938
2939         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2940                 rc = cifs_zap_mapping(inode);
2941                 if (rc) {
2942                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2943                         rc = 0; /* don't care about it in fsync */
2944                 }
2945         }
2946
2947         tcon = tlink_tcon(smbfile->tlink);
2948         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2949                 server = tcon->ses->server;
2950                 if (server->ops->flush == NULL) {
2951                         rc = -ENOSYS;
2952                         goto strict_fsync_exit;
2953                 }
2954
2955                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2956                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2957                         if (smbfile) {
2958                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2959                                 cifsFileInfo_put(smbfile);
2960                         } else
2961                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2962                 } else
2963                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2964         }
2965
2966 strict_fsync_exit:
2967         free_xid(xid);
2968         return rc;
2969 }
2970
2971 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2972 {
2973         unsigned int xid;
2974         int rc = 0;
2975         struct cifs_tcon *tcon;
2976         struct TCP_Server_Info *server;
2977         struct cifsFileInfo *smbfile = file->private_data;
2978         struct inode *inode = file_inode(file);
2979         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2980
2981         rc = file_write_and_wait_range(file, start, end);
2982         if (rc) {
2983                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2984                 return rc;
2985         }
2986
2987         xid = get_xid();
2988
2989         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2990                  file, datasync);
2991
2992         tcon = tlink_tcon(smbfile->tlink);
2993         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2994                 server = tcon->ses->server;
2995                 if (server->ops->flush == NULL) {
2996                         rc = -ENOSYS;
2997                         goto fsync_exit;
2998                 }
2999
3000                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3001                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3002                         if (smbfile) {
3003                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3004                                 cifsFileInfo_put(smbfile);
3005                         } else
3006                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3007                 } else
3008                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3009         }
3010
3011 fsync_exit:
3012         free_xid(xid);
3013         return rc;
3014 }
3015
3016 /*
3017  * As file closes, flush all cached write data for this inode checking
3018  * for write behind errors.
3019  */
3020 int cifs_flush(struct file *file, fl_owner_t id)
3021 {
3022         struct inode *inode = file_inode(file);
3023         int rc = 0;
3024
3025         if (file->f_mode & FMODE_WRITE)
3026                 rc = filemap_write_and_wait(inode->i_mapping);
3027
3028         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3029         if (rc) {
3030                 /* get more nuanced writeback errors */
3031                 rc = filemap_check_wb_err(file->f_mapping, 0);
3032                 trace_cifs_flush_err(inode->i_ino, rc);
3033         }
3034         return rc;
3035 }
3036
3037 static int
3038 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
3039 {
3040         int rc = 0;
3041         unsigned long i;
3042
3043         for (i = 0; i < num_pages; i++) {
3044                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3045                 if (!pages[i]) {
3046                         /*
3047                          * save number of pages we have already allocated and
3048                          * return with ENOMEM error
3049                          */
3050                         num_pages = i;
3051                         rc = -ENOMEM;
3052                         break;
3053                 }
3054         }
3055
3056         if (rc) {
3057                 for (i = 0; i < num_pages; i++)
3058                         put_page(pages[i]);
3059         }
3060         return rc;
3061 }
3062
3063 static inline
3064 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
3065 {
3066         size_t num_pages;
3067         size_t clen;
3068
3069         clen = min_t(const size_t, len, wsize);
3070         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
3071
3072         if (cur_len)
3073                 *cur_len = clen;
3074
3075         return num_pages;
3076 }
3077
3078 static void
3079 cifs_uncached_writedata_release(struct kref *refcount)
3080 {
3081         int i;
3082         struct cifs_writedata *wdata = container_of(refcount,
3083                                         struct cifs_writedata, refcount);
3084
3085         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3086         for (i = 0; i < wdata->nr_pages; i++)
3087                 put_page(wdata->pages[i]);
3088         cifs_writedata_release(refcount);
3089 }
3090
3091 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3092
3093 static void
3094 cifs_uncached_writev_complete(struct work_struct *work)
3095 {
3096         struct cifs_writedata *wdata = container_of(work,
3097                                         struct cifs_writedata, work);
3098         struct inode *inode = d_inode(wdata->cfile->dentry);
3099         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3100
3101         spin_lock(&inode->i_lock);
3102         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3103         if (cifsi->server_eof > inode->i_size)
3104                 i_size_write(inode, cifsi->server_eof);
3105         spin_unlock(&inode->i_lock);
3106
3107         complete(&wdata->done);
3108         collect_uncached_write_data(wdata->ctx);
3109         /* the below call can possibly free the last ref to aio ctx */
3110         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3111 }
3112
3113 static int
3114 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
3115                       size_t *len, unsigned long *num_pages)
3116 {
3117         size_t save_len, copied, bytes, cur_len = *len;
3118         unsigned long i, nr_pages = *num_pages;
3119
3120         save_len = cur_len;
3121         for (i = 0; i < nr_pages; i++) {
3122                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
3123                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
3124                 cur_len -= copied;
3125                 /*
3126                  * If we didn't copy as much as we expected, then that
3127                  * may mean we trod into an unmapped area. Stop copying
3128                  * at that point. On the next pass through the big
3129                  * loop, we'll likely end up getting a zero-length
3130                  * write and bailing out of it.
3131                  */
3132                 if (copied < bytes)
3133                         break;
3134         }
3135         cur_len = save_len - cur_len;
3136         *len = cur_len;
3137
3138         /*
3139          * If we have no data to send, then that probably means that
3140          * the copy above failed altogether. That's most likely because
3141          * the address in the iovec was bogus. Return -EFAULT and let
3142          * the caller free anything we allocated and bail out.
3143          */
3144         if (!cur_len)
3145                 return -EFAULT;
3146
3147         /*
3148          * i + 1 now represents the number of pages we actually used in
3149          * the copy phase above.
3150          */
3151         *num_pages = i + 1;
3152         return 0;
3153 }
3154
3155 static int
3156 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3157         struct cifs_aio_ctx *ctx)
3158 {
3159         unsigned int wsize;
3160         struct cifs_credits credits;
3161         int rc;
3162         struct TCP_Server_Info *server = wdata->server;
3163
3164         do {
3165                 if (wdata->cfile->invalidHandle) {
3166                         rc = cifs_reopen_file(wdata->cfile, false);
3167                         if (rc == -EAGAIN)
3168                                 continue;
3169                         else if (rc)
3170                                 break;
3171                 }
3172
3173
3174                 /*
3175                  * Wait for credits to resend this wdata.
3176                  * Note: we are attempting to resend the whole wdata not in
3177                  * segments
3178                  */
3179                 do {
3180                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3181                                                 &wsize, &credits);
3182                         if (rc)
3183                                 goto fail;
3184
3185                         if (wsize < wdata->bytes) {
3186                                 add_credits_and_wake_if(server, &credits, 0);
3187                                 msleep(1000);
3188                         }
3189                 } while (wsize < wdata->bytes);
3190                 wdata->credits = credits;
3191
3192                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3193
3194                 if (!rc) {
3195                         if (wdata->cfile->invalidHandle)
3196                                 rc = -EAGAIN;
3197                         else {
3198 #ifdef CONFIG_CIFS_SMB_DIRECT
3199                                 if (wdata->mr) {
3200                                         wdata->mr->need_invalidate = true;
3201                                         smbd_deregister_mr(wdata->mr);
3202                                         wdata->mr = NULL;
3203                                 }
3204 #endif
3205                                 rc = server->ops->async_writev(wdata,
3206                                         cifs_uncached_writedata_release);
3207                         }
3208                 }
3209
3210                 /* If the write was successfully sent, we are done */
3211                 if (!rc) {
3212                         list_add_tail(&wdata->list, wdata_list);
3213                         return 0;
3214                 }
3215
3216                 /* Roll back credits and retry if needed */
3217                 add_credits_and_wake_if(server, &wdata->credits, 0);
3218         } while (rc == -EAGAIN);
3219
3220 fail:
3221         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3222         return rc;
3223 }
3224
3225 static int
3226 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
3227                      struct cifsFileInfo *open_file,
3228                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3229                      struct cifs_aio_ctx *ctx)
3230 {
3231         int rc = 0;
3232         size_t cur_len;
3233         unsigned long nr_pages, num_pages, i;
3234         struct cifs_writedata *wdata;
3235         struct iov_iter saved_from = *from;
3236         loff_t saved_offset = offset;
3237         pid_t pid;
3238         struct TCP_Server_Info *server;
3239         struct page **pagevec;
3240         size_t start;
3241         unsigned int xid;
3242
3243         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3244                 pid = open_file->pid;
3245         else
3246                 pid = current->tgid;
3247
3248         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3249         xid = get_xid();
3250
3251         do {
3252                 unsigned int wsize;
3253                 struct cifs_credits credits_on_stack;
3254                 struct cifs_credits *credits = &credits_on_stack;
3255
3256                 if (open_file->invalidHandle) {
3257                         rc = cifs_reopen_file(open_file, false);
3258                         if (rc == -EAGAIN)
3259                                 continue;
3260                         else if (rc)
3261                                 break;
3262                 }
3263
3264                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3265                                                    &wsize, credits);
3266                 if (rc)
3267                         break;
3268
3269                 cur_len = min_t(const size_t, len, wsize);
3270
3271                 if (ctx->direct_io) {
3272                         ssize_t result;
3273
3274                         result = iov_iter_get_pages_alloc2(
3275                                 from, &pagevec, cur_len, &start);
3276                         if (result < 0) {
3277                                 cifs_dbg(VFS,
3278                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3279                                          result, iov_iter_type(from),
3280                                          from->iov_offset, from->count);
3281                                 dump_stack();
3282
3283                                 rc = result;
3284                                 add_credits_and_wake_if(server, credits, 0);
3285                                 break;
3286                         }
3287                         cur_len = (size_t)result;
3288
3289                         nr_pages =
3290                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3291
3292                         wdata = cifs_writedata_direct_alloc(pagevec,
3293                                              cifs_uncached_writev_complete);
3294                         if (!wdata) {
3295                                 rc = -ENOMEM;
3296                                 add_credits_and_wake_if(server, credits, 0);
3297                                 break;
3298                         }
3299
3300
3301                         wdata->page_offset = start;
3302                         wdata->tailsz =
3303                                 nr_pages > 1 ?
3304                                         cur_len - (PAGE_SIZE - start) -
3305                                         (nr_pages - 2) * PAGE_SIZE :
3306                                         cur_len;
3307                 } else {
3308                         nr_pages = get_numpages(wsize, len, &cur_len);
3309                         wdata = cifs_writedata_alloc(nr_pages,
3310                                              cifs_uncached_writev_complete);
3311                         if (!wdata) {
3312                                 rc = -ENOMEM;
3313                                 add_credits_and_wake_if(server, credits, 0);
3314                                 break;
3315                         }
3316
3317                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3318                         if (rc) {
3319                                 kvfree(wdata->pages);
3320                                 kfree(wdata);
3321                                 add_credits_and_wake_if(server, credits, 0);
3322                                 break;
3323                         }
3324
3325                         num_pages = nr_pages;
3326                         rc = wdata_fill_from_iovec(
3327                                 wdata, from, &cur_len, &num_pages);
3328                         if (rc) {
3329                                 for (i = 0; i < nr_pages; i++)
3330                                         put_page(wdata->pages[i]);
3331                                 kvfree(wdata->pages);
3332                                 kfree(wdata);
3333                                 add_credits_and_wake_if(server, credits, 0);
3334                                 break;
3335                         }
3336
3337                         /*
3338                          * Bring nr_pages down to the number of pages we
3339                          * actually used, and free any pages that we didn't use.
3340                          */
3341                         for ( ; nr_pages > num_pages; nr_pages--)
3342                                 put_page(wdata->pages[nr_pages - 1]);
3343
3344                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3345                 }
3346
3347                 wdata->sync_mode = WB_SYNC_ALL;
3348                 wdata->nr_pages = nr_pages;
3349                 wdata->offset = (__u64)offset;
3350                 wdata->cfile = cifsFileInfo_get(open_file);
3351                 wdata->server = server;
3352                 wdata->pid = pid;
3353                 wdata->bytes = cur_len;
3354                 wdata->pagesz = PAGE_SIZE;
3355                 wdata->credits = credits_on_stack;
3356                 wdata->ctx = ctx;
3357                 kref_get(&ctx->refcount);
3358
3359                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3360
3361                 if (!rc) {
3362                         if (wdata->cfile->invalidHandle)
3363                                 rc = -EAGAIN;
3364                         else
3365                                 rc = server->ops->async_writev(wdata,
3366                                         cifs_uncached_writedata_release);
3367                 }
3368
3369                 if (rc) {
3370                         add_credits_and_wake_if(server, &wdata->credits, 0);
3371                         kref_put(&wdata->refcount,
3372                                  cifs_uncached_writedata_release);
3373                         if (rc == -EAGAIN) {
3374                                 *from = saved_from;
3375                                 iov_iter_advance(from, offset - saved_offset);
3376                                 continue;
3377                         }
3378                         break;
3379                 }
3380
3381                 list_add_tail(&wdata->list, wdata_list);
3382                 offset += cur_len;
3383                 len -= cur_len;
3384         } while (len > 0);
3385
3386         free_xid(xid);
3387         return rc;
3388 }
3389
3390 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3391 {
3392         struct cifs_writedata *wdata, *tmp;
3393         struct cifs_tcon *tcon;
3394         struct cifs_sb_info *cifs_sb;
3395         struct dentry *dentry = ctx->cfile->dentry;
3396         ssize_t rc;
3397
3398         tcon = tlink_tcon(ctx->cfile->tlink);
3399         cifs_sb = CIFS_SB(dentry->d_sb);
3400
3401         mutex_lock(&ctx->aio_mutex);
3402
3403         if (list_empty(&ctx->list)) {
3404                 mutex_unlock(&ctx->aio_mutex);
3405                 return;
3406         }
3407
3408         rc = ctx->rc;
3409         /*
3410          * Wait for and collect replies for any successful sends in order of
3411          * increasing offset. Once an error is hit, then return without waiting
3412          * for any more replies.
3413          */
3414 restart_loop:
3415         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3416                 if (!rc) {
3417                         if (!try_wait_for_completion(&wdata->done)) {
3418                                 mutex_unlock(&ctx->aio_mutex);
3419                                 return;
3420                         }
3421
3422                         if (wdata->result)
3423                                 rc = wdata->result;
3424                         else
3425                                 ctx->total_len += wdata->bytes;
3426
3427                         /* resend call if it's a retryable error */
3428                         if (rc == -EAGAIN) {
3429                                 struct list_head tmp_list;
3430                                 struct iov_iter tmp_from = ctx->iter;
3431
3432                                 INIT_LIST_HEAD(&tmp_list);
3433                                 list_del_init(&wdata->list);
3434
3435                                 if (ctx->direct_io)
3436                                         rc = cifs_resend_wdata(
3437                                                 wdata, &tmp_list, ctx);
3438                                 else {
3439                                         iov_iter_advance(&tmp_from,
3440                                                  wdata->offset - ctx->pos);
3441
3442                                         rc = cifs_write_from_iter(wdata->offset,
3443                                                 wdata->bytes, &tmp_from,
3444                                                 ctx->cfile, cifs_sb, &tmp_list,
3445                                                 ctx);
3446
3447                                         kref_put(&wdata->refcount,
3448                                                 cifs_uncached_writedata_release);
3449                                 }
3450
3451                                 list_splice(&tmp_list, &ctx->list);
3452                                 goto restart_loop;
3453                         }
3454                 }
3455                 list_del_init(&wdata->list);
3456                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3457         }
3458
3459         cifs_stats_bytes_written(tcon, ctx->total_len);
3460         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3461
3462         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3463
3464         mutex_unlock(&ctx->aio_mutex);
3465
3466         if (ctx->iocb && ctx->iocb->ki_complete)
3467                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3468         else
3469                 complete(&ctx->done);
3470 }
3471
3472 static ssize_t __cifs_writev(
3473         struct kiocb *iocb, struct iov_iter *from, bool direct)
3474 {
3475         struct file *file = iocb->ki_filp;
3476         ssize_t total_written = 0;
3477         struct cifsFileInfo *cfile;
3478         struct cifs_tcon *tcon;
3479         struct cifs_sb_info *cifs_sb;
3480         struct cifs_aio_ctx *ctx;
3481         struct iov_iter saved_from = *from;
3482         size_t len = iov_iter_count(from);
3483         int rc;
3484
3485         /*
3486          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3487          * In this case, fall back to non-direct write function.
3488          * this could be improved by getting pages directly in ITER_KVEC
3489          */
3490         if (direct && iov_iter_is_kvec(from)) {
3491                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3492                 direct = false;
3493         }
3494
3495         rc = generic_write_checks(iocb, from);
3496         if (rc <= 0)
3497                 return rc;
3498
3499         cifs_sb = CIFS_FILE_SB(file);
3500         cfile = file->private_data;
3501         tcon = tlink_tcon(cfile->tlink);
3502
3503         if (!tcon->ses->server->ops->async_writev)
3504                 return -ENOSYS;
3505
3506         ctx = cifs_aio_ctx_alloc();
3507         if (!ctx)
3508                 return -ENOMEM;
3509
3510         ctx->cfile = cifsFileInfo_get(cfile);
3511
3512         if (!is_sync_kiocb(iocb))
3513                 ctx->iocb = iocb;
3514
3515         ctx->pos = iocb->ki_pos;
3516
3517         if (direct) {
3518                 ctx->direct_io = true;
3519                 ctx->iter = *from;
3520                 ctx->len = len;
3521         } else {
3522                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3523                 if (rc) {
3524                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3525                         return rc;
3526                 }
3527         }
3528
3529         /* grab a lock here due to read response handlers can access ctx */
3530         mutex_lock(&ctx->aio_mutex);
3531
3532         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3533                                   cfile, cifs_sb, &ctx->list, ctx);
3534
3535         /*
3536          * If at least one write was successfully sent, then discard any rc
3537          * value from the later writes. If the other write succeeds, then
3538          * we'll end up returning whatever was written. If it fails, then
3539          * we'll get a new rc value from that.
3540          */
3541         if (!list_empty(&ctx->list))
3542                 rc = 0;
3543
3544         mutex_unlock(&ctx->aio_mutex);
3545
3546         if (rc) {
3547                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3548                 return rc;
3549         }
3550
3551         if (!is_sync_kiocb(iocb)) {
3552                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3553                 return -EIOCBQUEUED;
3554         }
3555
3556         rc = wait_for_completion_killable(&ctx->done);
3557         if (rc) {
3558                 mutex_lock(&ctx->aio_mutex);
3559                 ctx->rc = rc = -EINTR;
3560                 total_written = ctx->total_len;
3561                 mutex_unlock(&ctx->aio_mutex);
3562         } else {
3563                 rc = ctx->rc;
3564                 total_written = ctx->total_len;
3565         }
3566
3567         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3568
3569         if (unlikely(!total_written))
3570                 return rc;
3571
3572         iocb->ki_pos += total_written;
3573         return total_written;
3574 }
3575
3576 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3577 {
3578         struct file *file = iocb->ki_filp;
3579
3580         cifs_revalidate_mapping(file->f_inode);
3581         return __cifs_writev(iocb, from, true);
3582 }
3583
3584 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3585 {
3586         return __cifs_writev(iocb, from, false);
3587 }
3588
3589 static ssize_t
3590 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3591 {
3592         struct file *file = iocb->ki_filp;
3593         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3594         struct inode *inode = file->f_mapping->host;
3595         struct cifsInodeInfo *cinode = CIFS_I(inode);
3596         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3597         ssize_t rc;
3598
3599         inode_lock(inode);
3600         /*
3601          * We need to hold the sem to be sure nobody modifies lock list
3602          * with a brlock that prevents writing.
3603          */
3604         down_read(&cinode->lock_sem);
3605
3606         rc = generic_write_checks(iocb, from);
3607         if (rc <= 0)
3608                 goto out;
3609
3610         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3611                                      server->vals->exclusive_lock_type, 0,
3612                                      NULL, CIFS_WRITE_OP))
3613                 rc = __generic_file_write_iter(iocb, from);
3614         else
3615                 rc = -EACCES;
3616 out:
3617         up_read(&cinode->lock_sem);
3618         inode_unlock(inode);
3619
3620         if (rc > 0)
3621                 rc = generic_write_sync(iocb, rc);
3622         return rc;
3623 }
3624
3625 ssize_t
3626 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3627 {
3628         struct inode *inode = file_inode(iocb->ki_filp);
3629         struct cifsInodeInfo *cinode = CIFS_I(inode);
3630         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3631         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3632                                                 iocb->ki_filp->private_data;
3633         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3634         ssize_t written;
3635
3636         written = cifs_get_writer(cinode);
3637         if (written)
3638                 return written;
3639
3640         if (CIFS_CACHE_WRITE(cinode)) {
3641                 if (cap_unix(tcon->ses) &&
3642                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3643                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3644                         written = generic_file_write_iter(iocb, from);
3645                         goto out;
3646                 }
3647                 written = cifs_writev(iocb, from);
3648                 goto out;
3649         }
3650         /*
3651          * For non-oplocked files in strict cache mode we need to write the data
3652          * to the server exactly from the pos to pos+len-1 rather than flush all
3653          * affected pages because it may cause a error with mandatory locks on
3654          * these pages but not on the region from pos to ppos+len-1.
3655          */
3656         written = cifs_user_writev(iocb, from);
3657         if (CIFS_CACHE_READ(cinode)) {
3658                 /*
3659                  * We have read level caching and we have just sent a write
3660                  * request to the server thus making data in the cache stale.
3661                  * Zap the cache and set oplock/lease level to NONE to avoid
3662                  * reading stale data from the cache. All subsequent read
3663                  * operations will read new data from the server.
3664                  */
3665                 cifs_zap_mapping(inode);
3666                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3667                          inode);
3668                 cinode->oplock = 0;
3669         }
3670 out:
3671         cifs_put_writer(cinode);
3672         return written;
3673 }
3674
3675 static struct cifs_readdata *
3676 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3677 {
3678         struct cifs_readdata *rdata;
3679
3680         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3681         if (rdata != NULL) {
3682                 rdata->pages = pages;
3683                 kref_init(&rdata->refcount);
3684                 INIT_LIST_HEAD(&rdata->list);
3685                 init_completion(&rdata->done);
3686                 INIT_WORK(&rdata->work, complete);
3687         }
3688
3689         return rdata;
3690 }
3691
3692 static struct cifs_readdata *
3693 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3694 {
3695         struct page **pages =
3696                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3697         struct cifs_readdata *ret = NULL;
3698
3699         if (pages) {
3700                 ret = cifs_readdata_direct_alloc(pages, complete);
3701                 if (!ret)
3702                         kfree(pages);
3703         }
3704
3705         return ret;
3706 }
3707
3708 void
3709 cifs_readdata_release(struct kref *refcount)
3710 {
3711         struct cifs_readdata *rdata = container_of(refcount,
3712                                         struct cifs_readdata, refcount);
3713 #ifdef CONFIG_CIFS_SMB_DIRECT
3714         if (rdata->mr) {
3715                 smbd_deregister_mr(rdata->mr);
3716                 rdata->mr = NULL;
3717         }
3718 #endif
3719         if (rdata->cfile)
3720                 cifsFileInfo_put(rdata->cfile);
3721
3722         kvfree(rdata->pages);
3723         kfree(rdata);
3724 }
3725
3726 static int
3727 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3728 {
3729         int rc = 0;
3730         struct page *page;
3731         unsigned int i;
3732
3733         for (i = 0; i < nr_pages; i++) {
3734                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3735                 if (!page) {
3736                         rc = -ENOMEM;
3737                         break;
3738                 }
3739                 rdata->pages[i] = page;
3740         }
3741
3742         if (rc) {
3743                 unsigned int nr_page_failed = i;
3744
3745                 for (i = 0; i < nr_page_failed; i++) {
3746                         put_page(rdata->pages[i]);
3747                         rdata->pages[i] = NULL;
3748                 }
3749         }
3750         return rc;
3751 }
3752
3753 static void
3754 cifs_uncached_readdata_release(struct kref *refcount)
3755 {
3756         struct cifs_readdata *rdata = container_of(refcount,
3757                                         struct cifs_readdata, refcount);
3758         unsigned int i;
3759
3760         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3761         for (i = 0; i < rdata->nr_pages; i++) {
3762                 put_page(rdata->pages[i]);
3763         }
3764         cifs_readdata_release(refcount);
3765 }
3766
3767 /**
3768  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3769  * @rdata:      the readdata response with list of pages holding data
3770  * @iter:       destination for our data
3771  *
3772  * This function copies data from a list of pages in a readdata response into
3773  * an array of iovecs. It will first calculate where the data should go
3774  * based on the info in the readdata and then copy the data into that spot.
3775  */
3776 static int
3777 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3778 {
3779         size_t remaining = rdata->got_bytes;
3780         unsigned int i;
3781
3782         for (i = 0; i < rdata->nr_pages; i++) {
3783                 struct page *page = rdata->pages[i];
3784                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3785                 size_t written;
3786
3787                 if (unlikely(iov_iter_is_pipe(iter))) {
3788                         void *addr = kmap_atomic(page);
3789
3790                         written = copy_to_iter(addr, copy, iter);
3791                         kunmap_atomic(addr);
3792                 } else
3793                         written = copy_page_to_iter(page, 0, copy, iter);
3794                 remaining -= written;
3795                 if (written < copy && iov_iter_count(iter) > 0)
3796                         break;
3797         }
3798         return remaining ? -EFAULT : 0;
3799 }
3800
3801 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3802
3803 static void
3804 cifs_uncached_readv_complete(struct work_struct *work)
3805 {
3806         struct cifs_readdata *rdata = container_of(work,
3807                                                 struct cifs_readdata, work);
3808
3809         complete(&rdata->done);
3810         collect_uncached_read_data(rdata->ctx);
3811         /* the below call can possibly free the last ref to aio ctx */
3812         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3813 }
3814
3815 static int
3816 uncached_fill_pages(struct TCP_Server_Info *server,
3817                     struct cifs_readdata *rdata, struct iov_iter *iter,
3818                     unsigned int len)
3819 {
3820         int result = 0;
3821         unsigned int i;
3822         unsigned int nr_pages = rdata->nr_pages;
3823         unsigned int page_offset = rdata->page_offset;
3824
3825         rdata->got_bytes = 0;
3826         rdata->tailsz = PAGE_SIZE;
3827         for (i = 0; i < nr_pages; i++) {
3828                 struct page *page = rdata->pages[i];
3829                 size_t n;
3830                 unsigned int segment_size = rdata->pagesz;
3831
3832                 if (i == 0)
3833                         segment_size -= page_offset;
3834                 else
3835                         page_offset = 0;
3836
3837
3838                 if (len <= 0) {
3839                         /* no need to hold page hostage */
3840                         rdata->pages[i] = NULL;
3841                         rdata->nr_pages--;
3842                         put_page(page);
3843                         continue;
3844                 }
3845
3846                 n = len;
3847                 if (len >= segment_size)
3848                         /* enough data to fill the page */
3849                         n = segment_size;
3850                 else
3851                         rdata->tailsz = len;
3852                 len -= n;
3853
3854                 if (iter)
3855                         result = copy_page_from_iter(
3856                                         page, page_offset, n, iter);
3857 #ifdef CONFIG_CIFS_SMB_DIRECT
3858                 else if (rdata->mr)
3859                         result = n;
3860 #endif
3861                 else
3862                         result = cifs_read_page_from_socket(
3863                                         server, page, page_offset, n);
3864                 if (result < 0)
3865                         break;
3866
3867                 rdata->got_bytes += result;
3868         }
3869
3870         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3871                                                 rdata->got_bytes : result;
3872 }
3873
3874 static int
3875 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3876                               struct cifs_readdata *rdata, unsigned int len)
3877 {
3878         return uncached_fill_pages(server, rdata, NULL, len);
3879 }
3880
3881 static int
3882 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3883                               struct cifs_readdata *rdata,
3884                               struct iov_iter *iter)
3885 {
3886         return uncached_fill_pages(server, rdata, iter, iter->count);
3887 }
3888
3889 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3890                         struct list_head *rdata_list,
3891                         struct cifs_aio_ctx *ctx)
3892 {
3893         unsigned int rsize;
3894         struct cifs_credits credits;
3895         int rc;
3896         struct TCP_Server_Info *server;
3897
3898         /* XXX: should we pick a new channel here? */
3899         server = rdata->server;
3900
3901         do {
3902                 if (rdata->cfile->invalidHandle) {
3903                         rc = cifs_reopen_file(rdata->cfile, true);
3904                         if (rc == -EAGAIN)
3905                                 continue;
3906                         else if (rc)
3907                                 break;
3908                 }
3909
3910                 /*
3911                  * Wait for credits to resend this rdata.
3912                  * Note: we are attempting to resend the whole rdata not in
3913                  * segments
3914                  */
3915                 do {
3916                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3917                                                 &rsize, &credits);
3918
3919                         if (rc)
3920                                 goto fail;
3921
3922                         if (rsize < rdata->bytes) {
3923                                 add_credits_and_wake_if(server, &credits, 0);
3924                                 msleep(1000);
3925                         }
3926                 } while (rsize < rdata->bytes);
3927                 rdata->credits = credits;
3928
3929                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3930                 if (!rc) {
3931                         if (rdata->cfile->invalidHandle)
3932                                 rc = -EAGAIN;
3933                         else {
3934 #ifdef CONFIG_CIFS_SMB_DIRECT
3935                                 if (rdata->mr) {
3936                                         rdata->mr->need_invalidate = true;
3937                                         smbd_deregister_mr(rdata->mr);
3938                                         rdata->mr = NULL;
3939                                 }
3940 #endif
3941                                 rc = server->ops->async_readv(rdata);
3942                         }
3943                 }
3944
3945                 /* If the read was successfully sent, we are done */
3946                 if (!rc) {
3947                         /* Add to aio pending list */
3948                         list_add_tail(&rdata->list, rdata_list);
3949                         return 0;
3950                 }
3951
3952                 /* Roll back credits and retry if needed */
3953                 add_credits_and_wake_if(server, &rdata->credits, 0);
3954         } while (rc == -EAGAIN);
3955
3956 fail:
3957         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3958         return rc;
3959 }
3960
3961 static int
3962 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3963                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3964                      struct cifs_aio_ctx *ctx)
3965 {
3966         struct cifs_readdata *rdata;
3967         unsigned int npages, rsize;
3968         struct cifs_credits credits_on_stack;
3969         struct cifs_credits *credits = &credits_on_stack;
3970         size_t cur_len;
3971         int rc;
3972         pid_t pid;
3973         struct TCP_Server_Info *server;
3974         struct page **pagevec;
3975         size_t start;
3976         struct iov_iter direct_iov = ctx->iter;
3977
3978         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3979
3980         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3981                 pid = open_file->pid;
3982         else
3983                 pid = current->tgid;
3984
3985         if (ctx->direct_io)
3986                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3987
3988         do {
3989                 if (open_file->invalidHandle) {
3990                         rc = cifs_reopen_file(open_file, true);
3991                         if (rc == -EAGAIN)
3992                                 continue;
3993                         else if (rc)
3994                                 break;
3995                 }
3996
3997                 if (cifs_sb->ctx->rsize == 0)
3998                         cifs_sb->ctx->rsize =
3999                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4000                                                              cifs_sb->ctx);
4001
4002                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4003                                                    &rsize, credits);
4004                 if (rc)
4005                         break;
4006
4007                 cur_len = min_t(const size_t, len, rsize);
4008
4009                 if (ctx->direct_io) {
4010                         ssize_t result;
4011
4012                         result = iov_iter_get_pages_alloc2(
4013                                         &direct_iov, &pagevec,
4014                                         cur_len, &start);
4015                         if (result < 0) {
4016                                 cifs_dbg(VFS,
4017                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
4018                                          result, iov_iter_type(&direct_iov),
4019                                          direct_iov.iov_offset,
4020                                          direct_iov.count);
4021                                 dump_stack();
4022
4023                                 rc = result;
4024                                 add_credits_and_wake_if(server, credits, 0);
4025                                 break;
4026                         }
4027                         cur_len = (size_t)result;
4028
4029                         rdata = cifs_readdata_direct_alloc(
4030                                         pagevec, cifs_uncached_readv_complete);
4031                         if (!rdata) {
4032                                 add_credits_and_wake_if(server, credits, 0);
4033                                 rc = -ENOMEM;
4034                                 break;
4035                         }
4036
4037                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
4038                         rdata->page_offset = start;
4039                         rdata->tailsz = npages > 1 ?
4040                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
4041                                 cur_len;
4042
4043                 } else {
4044
4045                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
4046                         /* allocate a readdata struct */
4047                         rdata = cifs_readdata_alloc(npages,
4048                                             cifs_uncached_readv_complete);
4049                         if (!rdata) {
4050                                 add_credits_and_wake_if(server, credits, 0);
4051                                 rc = -ENOMEM;
4052                                 break;
4053                         }
4054
4055                         rc = cifs_read_allocate_pages(rdata, npages);
4056                         if (rc) {
4057                                 kvfree(rdata->pages);
4058                                 kfree(rdata);
4059                                 add_credits_and_wake_if(server, credits, 0);
4060                                 break;
4061                         }
4062
4063                         rdata->tailsz = PAGE_SIZE;
4064                 }
4065
4066                 rdata->server = server;
4067                 rdata->cfile = cifsFileInfo_get(open_file);
4068                 rdata->nr_pages = npages;
4069                 rdata->offset = offset;
4070                 rdata->bytes = cur_len;
4071                 rdata->pid = pid;
4072                 rdata->pagesz = PAGE_SIZE;
4073                 rdata->read_into_pages = cifs_uncached_read_into_pages;
4074                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
4075                 rdata->credits = credits_on_stack;
4076                 rdata->ctx = ctx;
4077                 kref_get(&ctx->refcount);
4078
4079                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4080
4081                 if (!rc) {
4082                         if (rdata->cfile->invalidHandle)
4083                                 rc = -EAGAIN;
4084                         else
4085                                 rc = server->ops->async_readv(rdata);
4086                 }
4087
4088                 if (rc) {
4089                         add_credits_and_wake_if(server, &rdata->credits, 0);
4090                         kref_put(&rdata->refcount,
4091                                 cifs_uncached_readdata_release);
4092                         if (rc == -EAGAIN) {
4093                                 iov_iter_revert(&direct_iov, cur_len);
4094                                 continue;
4095                         }
4096                         break;
4097                 }
4098
4099                 list_add_tail(&rdata->list, rdata_list);
4100                 offset += cur_len;
4101                 len -= cur_len;
4102         } while (len > 0);
4103
4104         return rc;
4105 }
4106
4107 static void
4108 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4109 {
4110         struct cifs_readdata *rdata, *tmp;
4111         struct iov_iter *to = &ctx->iter;
4112         struct cifs_sb_info *cifs_sb;
4113         int rc;
4114
4115         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4116
4117         mutex_lock(&ctx->aio_mutex);
4118
4119         if (list_empty(&ctx->list)) {
4120                 mutex_unlock(&ctx->aio_mutex);
4121                 return;
4122         }
4123
4124         rc = ctx->rc;
4125         /* the loop below should proceed in the order of increasing offsets */
4126 again:
4127         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4128                 if (!rc) {
4129                         if (!try_wait_for_completion(&rdata->done)) {
4130                                 mutex_unlock(&ctx->aio_mutex);
4131                                 return;
4132                         }
4133
4134                         if (rdata->result == -EAGAIN) {
4135                                 /* resend call if it's a retryable error */
4136                                 struct list_head tmp_list;
4137                                 unsigned int got_bytes = rdata->got_bytes;
4138
4139                                 list_del_init(&rdata->list);
4140                                 INIT_LIST_HEAD(&tmp_list);
4141
4142                                 /*
4143                                  * Got a part of data and then reconnect has
4144                                  * happened -- fill the buffer and continue
4145                                  * reading.
4146                                  */
4147                                 if (got_bytes && got_bytes < rdata->bytes) {
4148                                         rc = 0;
4149                                         if (!ctx->direct_io)
4150                                                 rc = cifs_readdata_to_iov(rdata, to);
4151                                         if (rc) {
4152                                                 kref_put(&rdata->refcount,
4153                                                         cifs_uncached_readdata_release);
4154                                                 continue;
4155                                         }
4156                                 }
4157
4158                                 if (ctx->direct_io) {
4159                                         /*
4160                                          * Re-use rdata as this is a
4161                                          * direct I/O
4162                                          */
4163                                         rc = cifs_resend_rdata(
4164                                                 rdata,
4165                                                 &tmp_list, ctx);
4166                                 } else {
4167                                         rc = cifs_send_async_read(
4168                                                 rdata->offset + got_bytes,
4169                                                 rdata->bytes - got_bytes,
4170                                                 rdata->cfile, cifs_sb,
4171                                                 &tmp_list, ctx);
4172
4173                                         kref_put(&rdata->refcount,
4174                                                 cifs_uncached_readdata_release);
4175                                 }
4176
4177                                 list_splice(&tmp_list, &ctx->list);
4178
4179                                 goto again;
4180                         } else if (rdata->result)
4181                                 rc = rdata->result;
4182                         else if (!ctx->direct_io)
4183                                 rc = cifs_readdata_to_iov(rdata, to);
4184
4185                         /* if there was a short read -- discard anything left */
4186                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4187                                 rc = -ENODATA;
4188
4189                         ctx->total_len += rdata->got_bytes;
4190                 }
4191                 list_del_init(&rdata->list);
4192                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
4193         }
4194
4195         if (!ctx->direct_io)
4196                 ctx->total_len = ctx->len - iov_iter_count(to);
4197
4198         /* mask nodata case */
4199         if (rc == -ENODATA)
4200                 rc = 0;
4201
4202         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4203
4204         mutex_unlock(&ctx->aio_mutex);
4205
4206         if (ctx->iocb && ctx->iocb->ki_complete)
4207                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4208         else
4209                 complete(&ctx->done);
4210 }
4211
4212 static ssize_t __cifs_readv(
4213         struct kiocb *iocb, struct iov_iter *to, bool direct)
4214 {
4215         size_t len;
4216         struct file *file = iocb->ki_filp;
4217         struct cifs_sb_info *cifs_sb;
4218         struct cifsFileInfo *cfile;
4219         struct cifs_tcon *tcon;
4220         ssize_t rc, total_read = 0;
4221         loff_t offset = iocb->ki_pos;
4222         struct cifs_aio_ctx *ctx;
4223
4224         /*
4225          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
4226          * fall back to data copy read path
4227          * this could be improved by getting pages directly in ITER_KVEC
4228          */
4229         if (direct && iov_iter_is_kvec(to)) {
4230                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
4231                 direct = false;
4232         }
4233
4234         len = iov_iter_count(to);
4235         if (!len)
4236                 return 0;
4237
4238         cifs_sb = CIFS_FILE_SB(file);
4239         cfile = file->private_data;
4240         tcon = tlink_tcon(cfile->tlink);
4241
4242         if (!tcon->ses->server->ops->async_readv)
4243                 return -ENOSYS;
4244
4245         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4246                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4247
4248         ctx = cifs_aio_ctx_alloc();
4249         if (!ctx)
4250                 return -ENOMEM;
4251
4252         ctx->cfile = cifsFileInfo_get(cfile);
4253
4254         if (!is_sync_kiocb(iocb))
4255                 ctx->iocb = iocb;
4256
4257         if (user_backed_iter(to))
4258                 ctx->should_dirty = true;
4259
4260         if (direct) {
4261                 ctx->pos = offset;
4262                 ctx->direct_io = true;
4263                 ctx->iter = *to;
4264                 ctx->len = len;
4265         } else {
4266                 rc = setup_aio_ctx_iter(ctx, to, READ);
4267                 if (rc) {
4268                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4269                         return rc;
4270                 }
4271                 len = ctx->len;
4272         }
4273
4274         /* grab a lock here due to read response handlers can access ctx */
4275         mutex_lock(&ctx->aio_mutex);
4276
4277         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4278
4279         /* if at least one read request send succeeded, then reset rc */
4280         if (!list_empty(&ctx->list))
4281                 rc = 0;
4282
4283         mutex_unlock(&ctx->aio_mutex);
4284
4285         if (rc) {
4286                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4287                 return rc;
4288         }
4289
4290         if (!is_sync_kiocb(iocb)) {
4291                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4292                 return -EIOCBQUEUED;
4293         }
4294
4295         rc = wait_for_completion_killable(&ctx->done);
4296         if (rc) {
4297                 mutex_lock(&ctx->aio_mutex);
4298                 ctx->rc = rc = -EINTR;
4299                 total_read = ctx->total_len;
4300                 mutex_unlock(&ctx->aio_mutex);
4301         } else {
4302                 rc = ctx->rc;
4303                 total_read = ctx->total_len;
4304         }
4305
4306         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4307
4308         if (total_read) {
4309                 iocb->ki_pos += total_read;
4310                 return total_read;
4311         }
4312         return rc;
4313 }
4314
4315 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4316 {
4317         return __cifs_readv(iocb, to, true);
4318 }
4319
4320 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4321 {
4322         return __cifs_readv(iocb, to, false);
4323 }
4324
4325 ssize_t
4326 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4327 {
4328         struct inode *inode = file_inode(iocb->ki_filp);
4329         struct cifsInodeInfo *cinode = CIFS_I(inode);
4330         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4331         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4332                                                 iocb->ki_filp->private_data;
4333         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4334         int rc = -EACCES;
4335
4336         /*
4337          * In strict cache mode we need to read from the server all the time
4338          * if we don't have level II oplock because the server can delay mtime
4339          * change - so we can't make a decision about inode invalidating.
4340          * And we can also fail with pagereading if there are mandatory locks
4341          * on pages affected by this read but not on the region from pos to
4342          * pos+len-1.
4343          */
4344         if (!CIFS_CACHE_READ(cinode))
4345                 return cifs_user_readv(iocb, to);
4346
4347         if (cap_unix(tcon->ses) &&
4348             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4349             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4350                 return generic_file_read_iter(iocb, to);
4351
4352         /*
4353          * We need to hold the sem to be sure nobody modifies lock list
4354          * with a brlock that prevents reading.
4355          */
4356         down_read(&cinode->lock_sem);
4357         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4358                                      tcon->ses->server->vals->shared_lock_type,
4359                                      0, NULL, CIFS_READ_OP))
4360                 rc = generic_file_read_iter(iocb, to);
4361         up_read(&cinode->lock_sem);
4362         return rc;
4363 }
4364
4365 static ssize_t
4366 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4367 {
4368         int rc = -EACCES;
4369         unsigned int bytes_read = 0;
4370         unsigned int total_read;
4371         unsigned int current_read_size;
4372         unsigned int rsize;
4373         struct cifs_sb_info *cifs_sb;
4374         struct cifs_tcon *tcon;
4375         struct TCP_Server_Info *server;
4376         unsigned int xid;
4377         char *cur_offset;
4378         struct cifsFileInfo *open_file;
4379         struct cifs_io_parms io_parms = {0};
4380         int buf_type = CIFS_NO_BUFFER;
4381         __u32 pid;
4382
4383         xid = get_xid();
4384         cifs_sb = CIFS_FILE_SB(file);
4385
4386         /* FIXME: set up handlers for larger reads and/or convert to async */
4387         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4388
4389         if (file->private_data == NULL) {
4390                 rc = -EBADF;
4391                 free_xid(xid);
4392                 return rc;
4393         }
4394         open_file = file->private_data;
4395         tcon = tlink_tcon(open_file->tlink);
4396         server = cifs_pick_channel(tcon->ses);
4397
4398         if (!server->ops->sync_read) {
4399                 free_xid(xid);
4400                 return -ENOSYS;
4401         }
4402
4403         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4404                 pid = open_file->pid;
4405         else
4406                 pid = current->tgid;
4407
4408         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4409                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4410
4411         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4412              total_read += bytes_read, cur_offset += bytes_read) {
4413                 do {
4414                         current_read_size = min_t(uint, read_size - total_read,
4415                                                   rsize);
4416                         /*
4417                          * For windows me and 9x we do not want to request more
4418                          * than it negotiated since it will refuse the read
4419                          * then.
4420                          */
4421                         if (!(tcon->ses->capabilities &
4422                                 tcon->ses->server->vals->cap_large_files)) {
4423                                 current_read_size = min_t(uint,
4424                                         current_read_size, CIFSMaxBufSize);
4425                         }
4426                         if (open_file->invalidHandle) {
4427                                 rc = cifs_reopen_file(open_file, true);
4428                                 if (rc != 0)
4429                                         break;
4430                         }
4431                         io_parms.pid = pid;
4432                         io_parms.tcon = tcon;
4433                         io_parms.offset = *offset;
4434                         io_parms.length = current_read_size;
4435                         io_parms.server = server;
4436                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4437                                                     &bytes_read, &cur_offset,
4438                                                     &buf_type);
4439                 } while (rc == -EAGAIN);
4440
4441                 if (rc || (bytes_read == 0)) {
4442                         if (total_read) {
4443                                 break;
4444                         } else {
4445                                 free_xid(xid);
4446                                 return rc;
4447                         }
4448                 } else {
4449                         cifs_stats_bytes_read(tcon, total_read);
4450                         *offset += bytes_read;
4451                 }
4452         }
4453         free_xid(xid);
4454         return total_read;
4455 }
4456
4457 /*
4458  * If the page is mmap'ed into a process' page tables, then we need to make
4459  * sure that it doesn't change while being written back.
4460  */
4461 static vm_fault_t
4462 cifs_page_mkwrite(struct vm_fault *vmf)
4463 {
4464         struct page *page = vmf->page;
4465
4466         /* Wait for the page to be written to the cache before we allow it to
4467          * be modified.  We then assume the entire page will need writing back.
4468          */
4469 #ifdef CONFIG_CIFS_FSCACHE
4470         if (PageFsCache(page) &&
4471             wait_on_page_fscache_killable(page) < 0)
4472                 return VM_FAULT_RETRY;
4473 #endif
4474
4475         wait_on_page_writeback(page);
4476
4477         if (lock_page_killable(page) < 0)
4478                 return VM_FAULT_RETRY;
4479         return VM_FAULT_LOCKED;
4480 }
4481
4482 static const struct vm_operations_struct cifs_file_vm_ops = {
4483         .fault = filemap_fault,
4484         .map_pages = filemap_map_pages,
4485         .page_mkwrite = cifs_page_mkwrite,
4486 };
4487
4488 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4489 {
4490         int xid, rc = 0;
4491         struct inode *inode = file_inode(file);
4492
4493         xid = get_xid();
4494
4495         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4496                 rc = cifs_zap_mapping(inode);
4497         if (!rc)
4498                 rc = generic_file_mmap(file, vma);
4499         if (!rc)
4500                 vma->vm_ops = &cifs_file_vm_ops;
4501
4502         free_xid(xid);
4503         return rc;
4504 }
4505
4506 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4507 {
4508         int rc, xid;
4509
4510         xid = get_xid();
4511
4512         rc = cifs_revalidate_file(file);
4513         if (rc)
4514                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4515                          rc);
4516         if (!rc)
4517                 rc = generic_file_mmap(file, vma);
4518         if (!rc)
4519                 vma->vm_ops = &cifs_file_vm_ops;
4520
4521         free_xid(xid);
4522         return rc;
4523 }
4524
4525 static void
4526 cifs_readv_complete(struct work_struct *work)
4527 {
4528         unsigned int i, got_bytes;
4529         struct cifs_readdata *rdata = container_of(work,
4530                                                 struct cifs_readdata, work);
4531
4532         got_bytes = rdata->got_bytes;
4533         for (i = 0; i < rdata->nr_pages; i++) {
4534                 struct page *page = rdata->pages[i];
4535
4536                 if (rdata->result == 0 ||
4537                     (rdata->result == -EAGAIN && got_bytes)) {
4538                         flush_dcache_page(page);
4539                         SetPageUptodate(page);
4540                 } else
4541                         SetPageError(page);
4542
4543                 if (rdata->result == 0 ||
4544                     (rdata->result == -EAGAIN && got_bytes))
4545                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4546
4547                 unlock_page(page);
4548
4549                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4550
4551                 put_page(page);
4552                 rdata->pages[i] = NULL;
4553         }
4554         kref_put(&rdata->refcount, cifs_readdata_release);
4555 }
4556
4557 static int
4558 readpages_fill_pages(struct TCP_Server_Info *server,
4559                      struct cifs_readdata *rdata, struct iov_iter *iter,
4560                      unsigned int len)
4561 {
4562         int result = 0;
4563         unsigned int i;
4564         u64 eof;
4565         pgoff_t eof_index;
4566         unsigned int nr_pages = rdata->nr_pages;
4567         unsigned int page_offset = rdata->page_offset;
4568
4569         /* determine the eof that the server (probably) has */
4570         eof = CIFS_I(rdata->mapping->host)->server_eof;
4571         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4572         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4573
4574         rdata->got_bytes = 0;
4575         rdata->tailsz = PAGE_SIZE;
4576         for (i = 0; i < nr_pages; i++) {
4577                 struct page *page = rdata->pages[i];
4578                 unsigned int to_read = rdata->pagesz;
4579                 size_t n;
4580
4581                 if (i == 0)
4582                         to_read -= page_offset;
4583                 else
4584                         page_offset = 0;
4585
4586                 n = to_read;
4587
4588                 if (len >= to_read) {
4589                         len -= to_read;
4590                 } else if (len > 0) {
4591                         /* enough for partial page, fill and zero the rest */
4592                         zero_user(page, len + page_offset, to_read - len);
4593                         n = rdata->tailsz = len;
4594                         len = 0;
4595                 } else if (page->index > eof_index) {
4596                         /*
4597                          * The VFS will not try to do readahead past the
4598                          * i_size, but it's possible that we have outstanding
4599                          * writes with gaps in the middle and the i_size hasn't
4600                          * caught up yet. Populate those with zeroed out pages
4601                          * to prevent the VFS from repeatedly attempting to
4602                          * fill them until the writes are flushed.
4603                          */
4604                         zero_user(page, 0, PAGE_SIZE);
4605                         flush_dcache_page(page);
4606                         SetPageUptodate(page);
4607                         unlock_page(page);
4608                         put_page(page);
4609                         rdata->pages[i] = NULL;
4610                         rdata->nr_pages--;
4611                         continue;
4612                 } else {
4613                         /* no need to hold page hostage */
4614                         unlock_page(page);
4615                         put_page(page);
4616                         rdata->pages[i] = NULL;
4617                         rdata->nr_pages--;
4618                         continue;
4619                 }
4620
4621                 if (iter)
4622                         result = copy_page_from_iter(
4623                                         page, page_offset, n, iter);
4624 #ifdef CONFIG_CIFS_SMB_DIRECT
4625                 else if (rdata->mr)
4626                         result = n;
4627 #endif
4628                 else
4629                         result = cifs_read_page_from_socket(
4630                                         server, page, page_offset, n);
4631                 if (result < 0)
4632                         break;
4633
4634                 rdata->got_bytes += result;
4635         }
4636
4637         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4638                                                 rdata->got_bytes : result;
4639 }
4640
4641 static int
4642 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4643                                struct cifs_readdata *rdata, unsigned int len)
4644 {
4645         return readpages_fill_pages(server, rdata, NULL, len);
4646 }
4647
4648 static int
4649 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4650                                struct cifs_readdata *rdata,
4651                                struct iov_iter *iter)
4652 {
4653         return readpages_fill_pages(server, rdata, iter, iter->count);
4654 }
4655
4656 static void cifs_readahead(struct readahead_control *ractl)
4657 {
4658         int rc;
4659         struct cifsFileInfo *open_file = ractl->file->private_data;
4660         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4661         struct TCP_Server_Info *server;
4662         pid_t pid;
4663         unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0;
4664         pgoff_t next_cached = ULONG_MAX;
4665         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4666                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4667         bool check_cache = caching;
4668
4669         xid = get_xid();
4670
4671         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4672                 pid = open_file->pid;
4673         else
4674                 pid = current->tgid;
4675
4676         rc = 0;
4677         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4678
4679         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4680                  __func__, ractl->file, ractl->mapping, readahead_count(ractl));
4681
4682         /*
4683          * Chop the readahead request up into rsize-sized read requests.
4684          */
4685         while ((nr_pages = readahead_count(ractl) - last_batch_size)) {
4686                 unsigned int i, got, rsize;
4687                 struct page *page;
4688                 struct cifs_readdata *rdata;
4689                 struct cifs_credits credits_on_stack;
4690                 struct cifs_credits *credits = &credits_on_stack;
4691                 pgoff_t index = readahead_index(ractl) + last_batch_size;
4692
4693                 /*
4694                  * Find out if we have anything cached in the range of
4695                  * interest, and if so, where the next chunk of cached data is.
4696                  */
4697                 if (caching) {
4698                         if (check_cache) {
4699                                 rc = cifs_fscache_query_occupancy(
4700                                         ractl->mapping->host, index, nr_pages,
4701                                         &next_cached, &cache_nr_pages);
4702                                 if (rc < 0)
4703                                         caching = false;
4704                                 check_cache = false;
4705                         }
4706
4707                         if (index == next_cached) {
4708                                 /*
4709                                  * TODO: Send a whole batch of pages to be read
4710                                  * by the cache.
4711                                  */
4712                                 struct folio *folio = readahead_folio(ractl);
4713
4714                                 last_batch_size = folio_nr_pages(folio);
4715                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4716                                                                &folio->page) < 0) {
4717                                         /*
4718                                          * TODO: Deal with cache read failure
4719                                          * here, but for the moment, delegate
4720                                          * that to readpage.
4721                                          */
4722                                         caching = false;
4723                                 }
4724                                 folio_unlock(folio);
4725                                 next_cached++;
4726                                 cache_nr_pages--;
4727                                 if (cache_nr_pages == 0)
4728                                         check_cache = true;
4729                                 continue;
4730                         }
4731                 }
4732
4733                 if (open_file->invalidHandle) {
4734                         rc = cifs_reopen_file(open_file, true);
4735                         if (rc) {
4736                                 if (rc == -EAGAIN)
4737                                         continue;
4738                                 break;
4739                         }
4740                 }
4741
4742                 if (cifs_sb->ctx->rsize == 0)
4743                         cifs_sb->ctx->rsize =
4744                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4745                                                              cifs_sb->ctx);
4746
4747                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4748                                                    &rsize, credits);
4749                 if (rc)
4750                         break;
4751                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
4752                 nr_pages = min_t(size_t, nr_pages, next_cached - index);
4753
4754                 /*
4755                  * Give up immediately if rsize is too small to read an entire
4756                  * page. The VFS will fall back to readpage. We should never
4757                  * reach this point however since we set ra_pages to 0 when the
4758                  * rsize is smaller than a cache page.
4759                  */
4760                 if (unlikely(!nr_pages)) {
4761                         add_credits_and_wake_if(server, credits, 0);
4762                         break;
4763                 }
4764
4765                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4766                 if (!rdata) {
4767                         /* best to give up if we're out of mem */
4768                         add_credits_and_wake_if(server, credits, 0);
4769                         break;
4770                 }
4771
4772                 got = __readahead_batch(ractl, rdata->pages, nr_pages);
4773                 if (got != nr_pages) {
4774                         pr_warn("__readahead_batch() returned %u/%u\n",
4775                                 got, nr_pages);
4776                         nr_pages = got;
4777                 }
4778
4779                 rdata->nr_pages = nr_pages;
4780                 rdata->bytes    = readahead_batch_length(ractl);
4781                 rdata->cfile    = cifsFileInfo_get(open_file);
4782                 rdata->server   = server;
4783                 rdata->mapping  = ractl->mapping;
4784                 rdata->offset   = readahead_pos(ractl);
4785                 rdata->pid      = pid;
4786                 rdata->pagesz   = PAGE_SIZE;
4787                 rdata->tailsz   = PAGE_SIZE;
4788                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4789                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4790                 rdata->credits  = credits_on_stack;
4791
4792                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4793                 if (!rc) {
4794                         if (rdata->cfile->invalidHandle)
4795                                 rc = -EAGAIN;
4796                         else
4797                                 rc = server->ops->async_readv(rdata);
4798                 }
4799
4800                 if (rc) {
4801                         add_credits_and_wake_if(server, &rdata->credits, 0);
4802                         for (i = 0; i < rdata->nr_pages; i++) {
4803                                 page = rdata->pages[i];
4804                                 unlock_page(page);
4805                                 put_page(page);
4806                         }
4807                         /* Fallback to the readpage in error/reconnect cases */
4808                         kref_put(&rdata->refcount, cifs_readdata_release);
4809                         break;
4810                 }
4811
4812                 kref_put(&rdata->refcount, cifs_readdata_release);
4813                 last_batch_size = nr_pages;
4814         }
4815
4816         free_xid(xid);
4817 }
4818
4819 /*
4820  * cifs_readpage_worker must be called with the page pinned
4821  */
4822 static int cifs_readpage_worker(struct file *file, struct page *page,
4823         loff_t *poffset)
4824 {
4825         char *read_data;
4826         int rc;
4827
4828         /* Is the page cached? */
4829         rc = cifs_readpage_from_fscache(file_inode(file), page);
4830         if (rc == 0)
4831                 goto read_complete;
4832
4833         read_data = kmap(page);
4834         /* for reads over a certain size could initiate async read ahead */
4835
4836         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4837
4838         if (rc < 0)
4839                 goto io_error;
4840         else
4841                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4842
4843         /* we do not want atime to be less than mtime, it broke some apps */
4844         file_inode(file)->i_atime = current_time(file_inode(file));
4845         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4846                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4847         else
4848                 file_inode(file)->i_atime = current_time(file_inode(file));
4849
4850         if (PAGE_SIZE > rc)
4851                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4852
4853         flush_dcache_page(page);
4854         SetPageUptodate(page);
4855
4856         /* send this page to the cache */
4857         cifs_readpage_to_fscache(file_inode(file), page);
4858
4859         rc = 0;
4860
4861 io_error:
4862         kunmap(page);
4863         unlock_page(page);
4864
4865 read_complete:
4866         return rc;
4867 }
4868
4869 static int cifs_read_folio(struct file *file, struct folio *folio)
4870 {
4871         struct page *page = &folio->page;
4872         loff_t offset = page_file_offset(page);
4873         int rc = -EACCES;
4874         unsigned int xid;
4875
4876         xid = get_xid();
4877
4878         if (file->private_data == NULL) {
4879                 rc = -EBADF;
4880                 free_xid(xid);
4881                 return rc;
4882         }
4883
4884         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4885                  page, (int)offset, (int)offset);
4886
4887         rc = cifs_readpage_worker(file, page, &offset);
4888
4889         free_xid(xid);
4890         return rc;
4891 }
4892
4893 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4894 {
4895         struct cifsFileInfo *open_file;
4896
4897         spin_lock(&cifs_inode->open_file_lock);
4898         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4899                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4900                         spin_unlock(&cifs_inode->open_file_lock);
4901                         return 1;
4902                 }
4903         }
4904         spin_unlock(&cifs_inode->open_file_lock);
4905         return 0;
4906 }
4907
4908 /* We do not want to update the file size from server for inodes
4909    open for write - to avoid races with writepage extending
4910    the file - in the future we could consider allowing
4911    refreshing the inode only on increases in the file size
4912    but this is tricky to do without racing with writebehind
4913    page caching in the current Linux kernel design */
4914 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4915 {
4916         if (!cifsInode)
4917                 return true;
4918
4919         if (is_inode_writable(cifsInode)) {
4920                 /* This inode is open for write at least once */
4921                 struct cifs_sb_info *cifs_sb;
4922
4923                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4924                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4925                         /* since no page cache to corrupt on directio
4926                         we can change size safely */
4927                         return true;
4928                 }
4929
4930                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4931                         return true;
4932
4933                 return false;
4934         } else
4935                 return true;
4936 }
4937
4938 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4939                         loff_t pos, unsigned len,
4940                         struct page **pagep, void **fsdata)
4941 {
4942         int oncethru = 0;
4943         pgoff_t index = pos >> PAGE_SHIFT;
4944         loff_t offset = pos & (PAGE_SIZE - 1);
4945         loff_t page_start = pos & PAGE_MASK;
4946         loff_t i_size;
4947         struct page *page;
4948         int rc = 0;
4949
4950         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4951
4952 start:
4953         page = grab_cache_page_write_begin(mapping, index);
4954         if (!page) {
4955                 rc = -ENOMEM;
4956                 goto out;
4957         }
4958
4959         if (PageUptodate(page))
4960                 goto out;
4961
4962         /*
4963          * If we write a full page it will be up to date, no need to read from
4964          * the server. If the write is short, we'll end up doing a sync write
4965          * instead.
4966          */
4967         if (len == PAGE_SIZE)
4968                 goto out;
4969
4970         /*
4971          * optimize away the read when we have an oplock, and we're not
4972          * expecting to use any of the data we'd be reading in. That
4973          * is, when the page lies beyond the EOF, or straddles the EOF
4974          * and the write will cover all of the existing data.
4975          */
4976         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4977                 i_size = i_size_read(mapping->host);
4978                 if (page_start >= i_size ||
4979                     (offset == 0 && (pos + len) >= i_size)) {
4980                         zero_user_segments(page, 0, offset,
4981                                            offset + len,
4982                                            PAGE_SIZE);
4983                         /*
4984                          * PageChecked means that the parts of the page
4985                          * to which we're not writing are considered up
4986                          * to date. Once the data is copied to the
4987                          * page, it can be set uptodate.
4988                          */
4989                         SetPageChecked(page);
4990                         goto out;
4991                 }
4992         }
4993
4994         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4995                 /*
4996                  * might as well read a page, it is fast enough. If we get
4997                  * an error, we don't need to return it. cifs_write_end will
4998                  * do a sync write instead since PG_uptodate isn't set.
4999                  */
5000                 cifs_readpage_worker(file, page, &page_start);
5001                 put_page(page);
5002                 oncethru = 1;
5003                 goto start;
5004         } else {
5005                 /* we could try using another file handle if there is one -
5006                    but how would we lock it to prevent close of that handle
5007                    racing with this read? In any case
5008                    this will be written out by write_end so is fine */
5009         }
5010 out:
5011         *pagep = page;
5012         return rc;
5013 }
5014
5015 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
5016 {
5017         if (folio_test_private(folio))
5018                 return 0;
5019         if (folio_test_fscache(folio)) {
5020                 if (current_is_kswapd() || !(gfp & __GFP_FS))
5021                         return false;
5022                 folio_wait_fscache(folio);
5023         }
5024         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
5025         return true;
5026 }
5027
5028 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
5029                                  size_t length)
5030 {
5031         folio_wait_fscache(folio);
5032 }
5033
5034 static int cifs_launder_folio(struct folio *folio)
5035 {
5036         int rc = 0;
5037         loff_t range_start = folio_pos(folio);
5038         loff_t range_end = range_start + folio_size(folio);
5039         struct writeback_control wbc = {
5040                 .sync_mode = WB_SYNC_ALL,
5041                 .nr_to_write = 0,
5042                 .range_start = range_start,
5043                 .range_end = range_end,
5044         };
5045
5046         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5047
5048         if (folio_clear_dirty_for_io(folio))
5049                 rc = cifs_writepage_locked(&folio->page, &wbc);
5050
5051         folio_wait_fscache(folio);
5052         return rc;
5053 }
5054
5055 void cifs_oplock_break(struct work_struct *work)
5056 {
5057         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5058                                                   oplock_break);
5059         struct inode *inode = d_inode(cfile->dentry);
5060         struct cifsInodeInfo *cinode = CIFS_I(inode);
5061         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
5062         struct TCP_Server_Info *server = tcon->ses->server;
5063         int rc = 0;
5064         bool purge_cache = false;
5065
5066         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5067                         TASK_UNINTERRUPTIBLE);
5068
5069         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5070                                       cfile->oplock_epoch, &purge_cache);
5071
5072         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5073                                                 cifs_has_mand_locks(cinode)) {
5074                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5075                          inode);
5076                 cinode->oplock = 0;
5077         }
5078
5079         if (inode && S_ISREG(inode->i_mode)) {
5080                 if (CIFS_CACHE_READ(cinode))
5081                         break_lease(inode, O_RDONLY);
5082                 else
5083                         break_lease(inode, O_WRONLY);
5084                 rc = filemap_fdatawrite(inode->i_mapping);
5085                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5086                         rc = filemap_fdatawait(inode->i_mapping);
5087                         mapping_set_error(inode->i_mapping, rc);
5088                         cifs_zap_mapping(inode);
5089                 }
5090                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5091                 if (CIFS_CACHE_WRITE(cinode))
5092                         goto oplock_break_ack;
5093         }
5094
5095         rc = cifs_push_locks(cfile);
5096         if (rc)
5097                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5098
5099 oplock_break_ack:
5100         /*
5101          * releasing stale oplock after recent reconnect of smb session using
5102          * a now incorrect file handle is not a data integrity issue but do
5103          * not bother sending an oplock release if session to server still is
5104          * disconnected since oplock already released by the server
5105          */
5106         if (!cfile->oplock_break_cancelled) {
5107                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
5108                                                              cinode);
5109                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5110         }
5111
5112         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5113         cifs_done_oplock_break(cinode);
5114 }
5115
5116 /*
5117  * The presence of cifs_direct_io() in the address space ops vector
5118  * allowes open() O_DIRECT flags which would have failed otherwise.
5119  *
5120  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5121  * so this method should never be called.
5122  *
5123  * Direct IO is not yet supported in the cached mode.
5124  */
5125 static ssize_t
5126 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5127 {
5128         /*
5129          * FIXME
5130          * Eventually need to support direct IO for non forcedirectio mounts
5131          */
5132         return -EINVAL;
5133 }
5134
5135 static int cifs_swap_activate(struct swap_info_struct *sis,
5136                               struct file *swap_file, sector_t *span)
5137 {
5138         struct cifsFileInfo *cfile = swap_file->private_data;
5139         struct inode *inode = swap_file->f_mapping->host;
5140         unsigned long blocks;
5141         long long isize;
5142
5143         cifs_dbg(FYI, "swap activate\n");
5144
5145         if (!swap_file->f_mapping->a_ops->swap_rw)
5146                 /* Cannot support swap */
5147                 return -EINVAL;
5148
5149         spin_lock(&inode->i_lock);
5150         blocks = inode->i_blocks;
5151         isize = inode->i_size;
5152         spin_unlock(&inode->i_lock);
5153         if (blocks*512 < isize) {
5154                 pr_warn("swap activate: swapfile has holes\n");
5155                 return -EINVAL;
5156         }
5157         *span = sis->pages;
5158
5159         pr_warn_once("Swap support over SMB3 is experimental\n");
5160
5161         /*
5162          * TODO: consider adding ACL (or documenting how) to prevent other
5163          * users (on this or other systems) from reading it
5164          */
5165
5166
5167         /* TODO: add sk_set_memalloc(inet) or similar */
5168
5169         if (cfile)
5170                 cfile->swapfile = true;
5171         /*
5172          * TODO: Since file already open, we can't open with DENY_ALL here
5173          * but we could add call to grab a byte range lock to prevent others
5174          * from reading or writing the file
5175          */
5176
5177         sis->flags |= SWP_FS_OPS;
5178         return add_swap_extent(sis, 0, sis->max, 0);
5179 }
5180
5181 static void cifs_swap_deactivate(struct file *file)
5182 {
5183         struct cifsFileInfo *cfile = file->private_data;
5184
5185         cifs_dbg(FYI, "swap deactivate\n");
5186
5187         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5188
5189         if (cfile)
5190                 cfile->swapfile = false;
5191
5192         /* do we need to unpin (or unlock) the file */
5193 }
5194
5195 /*
5196  * Mark a page as having been made dirty and thus needing writeback.  We also
5197  * need to pin the cache object to write back to.
5198  */
5199 #ifdef CONFIG_CIFS_FSCACHE
5200 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5201 {
5202         return fscache_dirty_folio(mapping, folio,
5203                                         cifs_inode_cookie(mapping->host));
5204 }
5205 #else
5206 #define cifs_dirty_folio filemap_dirty_folio
5207 #endif
5208
5209 const struct address_space_operations cifs_addr_ops = {
5210         .read_folio = cifs_read_folio,
5211         .readahead = cifs_readahead,
5212         .writepage = cifs_writepage,
5213         .writepages = cifs_writepages,
5214         .write_begin = cifs_write_begin,
5215         .write_end = cifs_write_end,
5216         .dirty_folio = cifs_dirty_folio,
5217         .release_folio = cifs_release_folio,
5218         .direct_IO = cifs_direct_io,
5219         .invalidate_folio = cifs_invalidate_folio,
5220         .launder_folio = cifs_launder_folio,
5221         /*
5222          * TODO: investigate and if useful we could add an cifs_migratePage
5223          * helper (under an CONFIG_MIGRATION) in the future, and also
5224          * investigate and add an is_dirty_writeback helper if needed
5225          */
5226         .swap_activate = cifs_swap_activate,
5227         .swap_deactivate = cifs_swap_deactivate,
5228 };
5229
5230 /*
5231  * cifs_readahead requires the server to support a buffer large enough to
5232  * contain the header plus one complete page of data.  Otherwise, we need
5233  * to leave cifs_readahead out of the address space operations.
5234  */
5235 const struct address_space_operations cifs_addr_ops_smallbuf = {
5236         .read_folio = cifs_read_folio,
5237         .writepage = cifs_writepage,
5238         .writepages = cifs_writepages,
5239         .write_begin = cifs_write_begin,
5240         .write_end = cifs_write_end,
5241         .dirty_folio = cifs_dirty_folio,
5242         .release_folio = cifs_release_folio,
5243         .invalidate_folio = cifs_invalidate_folio,
5244         .launder_folio = cifs_launder_folio,
5245 };