GNU Linux-libre 6.1.24-gnu
[releases.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "smb2proto.h"
30 #include "cifs_unicode.h"
31 #include "cifs_debug.h"
32 #include "cifs_fs_sb.h"
33 #include "fscache.h"
34 #include "smbdirect.h"
35 #include "fs_context.h"
36 #include "cifs_ioctl.h"
37 #include "cached_dir.h"
38
39 /*
40  * Mark as invalid, all open files on tree connections since they
41  * were closed when session to server was lost.
42  */
43 void
44 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
45 {
46         struct cifsFileInfo *open_file = NULL;
47         struct list_head *tmp;
48         struct list_head *tmp1;
49
50         /* only send once per connect */
51         spin_lock(&tcon->ses->ses_lock);
52         if ((tcon->ses->ses_status != SES_GOOD) || (tcon->status != TID_NEED_RECON)) {
53                 spin_unlock(&tcon->ses->ses_lock);
54                 return;
55         }
56         tcon->status = TID_IN_FILES_INVALIDATE;
57         spin_unlock(&tcon->ses->ses_lock);
58
59         /* list all files open on tree connection and mark them invalid */
60         spin_lock(&tcon->open_file_lock);
61         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
62                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
63                 open_file->invalidHandle = true;
64                 open_file->oplock_break_cancelled = true;
65         }
66         spin_unlock(&tcon->open_file_lock);
67
68         invalidate_all_cached_dirs(tcon);
69         spin_lock(&tcon->tc_lock);
70         if (tcon->status == TID_IN_FILES_INVALIDATE)
71                 tcon->status = TID_NEED_TCON;
72         spin_unlock(&tcon->tc_lock);
73
74         /*
75          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
76          * to this tcon.
77          */
78 }
79
80 static inline int cifs_convert_flags(unsigned int flags)
81 {
82         if ((flags & O_ACCMODE) == O_RDONLY)
83                 return GENERIC_READ;
84         else if ((flags & O_ACCMODE) == O_WRONLY)
85                 return GENERIC_WRITE;
86         else if ((flags & O_ACCMODE) == O_RDWR) {
87                 /* GENERIC_ALL is too much permission to request
88                    can cause unnecessary access denied on create */
89                 /* return GENERIC_ALL; */
90                 return (GENERIC_READ | GENERIC_WRITE);
91         }
92
93         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
94                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
95                 FILE_READ_DATA);
96 }
97
98 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
99 static u32 cifs_posix_convert_flags(unsigned int flags)
100 {
101         u32 posix_flags = 0;
102
103         if ((flags & O_ACCMODE) == O_RDONLY)
104                 posix_flags = SMB_O_RDONLY;
105         else if ((flags & O_ACCMODE) == O_WRONLY)
106                 posix_flags = SMB_O_WRONLY;
107         else if ((flags & O_ACCMODE) == O_RDWR)
108                 posix_flags = SMB_O_RDWR;
109
110         if (flags & O_CREAT) {
111                 posix_flags |= SMB_O_CREAT;
112                 if (flags & O_EXCL)
113                         posix_flags |= SMB_O_EXCL;
114         } else if (flags & O_EXCL)
115                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
116                          current->comm, current->tgid);
117
118         if (flags & O_TRUNC)
119                 posix_flags |= SMB_O_TRUNC;
120         /* be safe and imply O_SYNC for O_DSYNC */
121         if (flags & O_DSYNC)
122                 posix_flags |= SMB_O_SYNC;
123         if (flags & O_DIRECTORY)
124                 posix_flags |= SMB_O_DIRECTORY;
125         if (flags & O_NOFOLLOW)
126                 posix_flags |= SMB_O_NOFOLLOW;
127         if (flags & O_DIRECT)
128                 posix_flags |= SMB_O_DIRECT;
129
130         return posix_flags;
131 }
132 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
133
134 static inline int cifs_get_disposition(unsigned int flags)
135 {
136         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
137                 return FILE_CREATE;
138         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
139                 return FILE_OVERWRITE_IF;
140         else if ((flags & O_CREAT) == O_CREAT)
141                 return FILE_OPEN_IF;
142         else if ((flags & O_TRUNC) == O_TRUNC)
143                 return FILE_OVERWRITE;
144         else
145                 return FILE_OPEN;
146 }
147
148 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
149 int cifs_posix_open(const char *full_path, struct inode **pinode,
150                         struct super_block *sb, int mode, unsigned int f_flags,
151                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
152 {
153         int rc;
154         FILE_UNIX_BASIC_INFO *presp_data;
155         __u32 posix_flags = 0;
156         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
157         struct cifs_fattr fattr;
158         struct tcon_link *tlink;
159         struct cifs_tcon *tcon;
160
161         cifs_dbg(FYI, "posix open %s\n", full_path);
162
163         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
164         if (presp_data == NULL)
165                 return -ENOMEM;
166
167         tlink = cifs_sb_tlink(cifs_sb);
168         if (IS_ERR(tlink)) {
169                 rc = PTR_ERR(tlink);
170                 goto posix_open_ret;
171         }
172
173         tcon = tlink_tcon(tlink);
174         mode &= ~current_umask();
175
176         posix_flags = cifs_posix_convert_flags(f_flags);
177         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
178                              poplock, full_path, cifs_sb->local_nls,
179                              cifs_remap(cifs_sb));
180         cifs_put_tlink(tlink);
181
182         if (rc)
183                 goto posix_open_ret;
184
185         if (presp_data->Type == cpu_to_le32(-1))
186                 goto posix_open_ret; /* open ok, caller does qpathinfo */
187
188         if (!pinode)
189                 goto posix_open_ret; /* caller does not need info */
190
191         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
192
193         /* get new inode and set it up */
194         if (*pinode == NULL) {
195                 cifs_fill_uniqueid(sb, &fattr);
196                 *pinode = cifs_iget(sb, &fattr);
197                 if (!*pinode) {
198                         rc = -ENOMEM;
199                         goto posix_open_ret;
200                 }
201         } else {
202                 cifs_revalidate_mapping(*pinode);
203                 rc = cifs_fattr_to_inode(*pinode, &fattr);
204         }
205
206 posix_open_ret:
207         kfree(presp_data);
208         return rc;
209 }
210 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
211
212 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
213                         struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
214                         struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
215 {
216         int rc;
217         int desired_access;
218         int disposition;
219         int create_options = CREATE_NOT_DIR;
220         struct TCP_Server_Info *server = tcon->ses->server;
221         struct cifs_open_parms oparms;
222
223         if (!server->ops->open)
224                 return -ENOSYS;
225
226         desired_access = cifs_convert_flags(f_flags);
227
228 /*********************************************************************
229  *  open flag mapping table:
230  *
231  *      POSIX Flag            CIFS Disposition
232  *      ----------            ----------------
233  *      O_CREAT               FILE_OPEN_IF
234  *      O_CREAT | O_EXCL      FILE_CREATE
235  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
236  *      O_TRUNC               FILE_OVERWRITE
237  *      none of the above     FILE_OPEN
238  *
239  *      Note that there is not a direct match between disposition
240  *      FILE_SUPERSEDE (ie create whether or not file exists although
241  *      O_CREAT | O_TRUNC is similar but truncates the existing
242  *      file rather than creating a new file as FILE_SUPERSEDE does
243  *      (which uses the attributes / metadata passed in on open call)
244  *?
245  *?  O_SYNC is a reasonable match to CIFS writethrough flag
246  *?  and the read write flags match reasonably.  O_LARGEFILE
247  *?  is irrelevant because largefile support is always used
248  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
249  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
250  *********************************************************************/
251
252         disposition = cifs_get_disposition(f_flags);
253
254         /* BB pass O_SYNC flag through on file attributes .. BB */
255
256         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
257         if (f_flags & O_SYNC)
258                 create_options |= CREATE_WRITE_THROUGH;
259
260         if (f_flags & O_DIRECT)
261                 create_options |= CREATE_NO_BUFFER;
262
263         oparms = (struct cifs_open_parms) {
264                 .tcon = tcon,
265                 .cifs_sb = cifs_sb,
266                 .desired_access = desired_access,
267                 .create_options = cifs_create_options(cifs_sb, create_options),
268                 .disposition = disposition,
269                 .path = full_path,
270                 .fid = fid,
271         };
272
273         rc = server->ops->open(xid, &oparms, oplock, buf);
274         if (rc)
275                 return rc;
276
277         /* TODO: Add support for calling posix query info but with passing in fid */
278         if (tcon->unix_ext)
279                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
280                                               xid);
281         else
282                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
283                                          xid, fid);
284
285         if (rc) {
286                 server->ops->close(xid, tcon, fid);
287                 if (rc == -ESTALE)
288                         rc = -EOPENSTALE;
289         }
290
291         return rc;
292 }
293
294 static bool
295 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
296 {
297         struct cifs_fid_locks *cur;
298         bool has_locks = false;
299
300         down_read(&cinode->lock_sem);
301         list_for_each_entry(cur, &cinode->llist, llist) {
302                 if (!list_empty(&cur->locks)) {
303                         has_locks = true;
304                         break;
305                 }
306         }
307         up_read(&cinode->lock_sem);
308         return has_locks;
309 }
310
311 void
312 cifs_down_write(struct rw_semaphore *sem)
313 {
314         while (!down_write_trylock(sem))
315                 msleep(10);
316 }
317
318 static void cifsFileInfo_put_work(struct work_struct *work);
319
320 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
321                                        struct tcon_link *tlink, __u32 oplock,
322                                        const char *symlink_target)
323 {
324         struct dentry *dentry = file_dentry(file);
325         struct inode *inode = d_inode(dentry);
326         struct cifsInodeInfo *cinode = CIFS_I(inode);
327         struct cifsFileInfo *cfile;
328         struct cifs_fid_locks *fdlocks;
329         struct cifs_tcon *tcon = tlink_tcon(tlink);
330         struct TCP_Server_Info *server = tcon->ses->server;
331
332         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
333         if (cfile == NULL)
334                 return cfile;
335
336         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
337         if (!fdlocks) {
338                 kfree(cfile);
339                 return NULL;
340         }
341
342         if (symlink_target) {
343                 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
344                 if (!cfile->symlink_target) {
345                         kfree(fdlocks);
346                         kfree(cfile);
347                         return NULL;
348                 }
349         }
350
351         INIT_LIST_HEAD(&fdlocks->locks);
352         fdlocks->cfile = cfile;
353         cfile->llist = fdlocks;
354
355         cfile->count = 1;
356         cfile->pid = current->tgid;
357         cfile->uid = current_fsuid();
358         cfile->dentry = dget(dentry);
359         cfile->f_flags = file->f_flags;
360         cfile->invalidHandle = false;
361         cfile->deferred_close_scheduled = false;
362         cfile->tlink = cifs_get_tlink(tlink);
363         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
364         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
365         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
366         mutex_init(&cfile->fh_mutex);
367         spin_lock_init(&cfile->file_info_lock);
368
369         cifs_sb_active(inode->i_sb);
370
371         /*
372          * If the server returned a read oplock and we have mandatory brlocks,
373          * set oplock level to None.
374          */
375         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
376                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
377                 oplock = 0;
378         }
379
380         cifs_down_write(&cinode->lock_sem);
381         list_add(&fdlocks->llist, &cinode->llist);
382         up_write(&cinode->lock_sem);
383
384         spin_lock(&tcon->open_file_lock);
385         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
386                 oplock = fid->pending_open->oplock;
387         list_del(&fid->pending_open->olist);
388
389         fid->purge_cache = false;
390         server->ops->set_fid(cfile, fid, oplock);
391
392         list_add(&cfile->tlist, &tcon->openFileList);
393         atomic_inc(&tcon->num_local_opens);
394
395         /* if readable file instance put first in list*/
396         spin_lock(&cinode->open_file_lock);
397         if (file->f_mode & FMODE_READ)
398                 list_add(&cfile->flist, &cinode->openFileList);
399         else
400                 list_add_tail(&cfile->flist, &cinode->openFileList);
401         spin_unlock(&cinode->open_file_lock);
402         spin_unlock(&tcon->open_file_lock);
403
404         if (fid->purge_cache)
405                 cifs_zap_mapping(inode);
406
407         file->private_data = cfile;
408         return cfile;
409 }
410
411 struct cifsFileInfo *
412 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
413 {
414         spin_lock(&cifs_file->file_info_lock);
415         cifsFileInfo_get_locked(cifs_file);
416         spin_unlock(&cifs_file->file_info_lock);
417         return cifs_file;
418 }
419
420 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
421 {
422         struct inode *inode = d_inode(cifs_file->dentry);
423         struct cifsInodeInfo *cifsi = CIFS_I(inode);
424         struct cifsLockInfo *li, *tmp;
425         struct super_block *sb = inode->i_sb;
426
427         /*
428          * Delete any outstanding lock records. We'll lose them when the file
429          * is closed anyway.
430          */
431         cifs_down_write(&cifsi->lock_sem);
432         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
433                 list_del(&li->llist);
434                 cifs_del_lock_waiters(li);
435                 kfree(li);
436         }
437         list_del(&cifs_file->llist->llist);
438         kfree(cifs_file->llist);
439         up_write(&cifsi->lock_sem);
440
441         cifs_put_tlink(cifs_file->tlink);
442         dput(cifs_file->dentry);
443         cifs_sb_deactive(sb);
444         kfree(cifs_file->symlink_target);
445         kfree(cifs_file);
446 }
447
448 static void cifsFileInfo_put_work(struct work_struct *work)
449 {
450         struct cifsFileInfo *cifs_file = container_of(work,
451                         struct cifsFileInfo, put);
452
453         cifsFileInfo_put_final(cifs_file);
454 }
455
456 /**
457  * cifsFileInfo_put - release a reference of file priv data
458  *
459  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
460  *
461  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
462  */
463 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
464 {
465         _cifsFileInfo_put(cifs_file, true, true);
466 }
467
468 /**
469  * _cifsFileInfo_put - release a reference of file priv data
470  *
471  * This may involve closing the filehandle @cifs_file out on the
472  * server. Must be called without holding tcon->open_file_lock,
473  * cinode->open_file_lock and cifs_file->file_info_lock.
474  *
475  * If @wait_for_oplock_handler is true and we are releasing the last
476  * reference, wait for any running oplock break handler of the file
477  * and cancel any pending one.
478  *
479  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
480  * @wait_oplock_handler: must be false if called from oplock_break_handler
481  * @offload:    not offloaded on close and oplock breaks
482  *
483  */
484 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
485                        bool wait_oplock_handler, bool offload)
486 {
487         struct inode *inode = d_inode(cifs_file->dentry);
488         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
489         struct TCP_Server_Info *server = tcon->ses->server;
490         struct cifsInodeInfo *cifsi = CIFS_I(inode);
491         struct super_block *sb = inode->i_sb;
492         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
493         struct cifs_fid fid = {};
494         struct cifs_pending_open open;
495         bool oplock_break_cancelled;
496
497         spin_lock(&tcon->open_file_lock);
498         spin_lock(&cifsi->open_file_lock);
499         spin_lock(&cifs_file->file_info_lock);
500         if (--cifs_file->count > 0) {
501                 spin_unlock(&cifs_file->file_info_lock);
502                 spin_unlock(&cifsi->open_file_lock);
503                 spin_unlock(&tcon->open_file_lock);
504                 return;
505         }
506         spin_unlock(&cifs_file->file_info_lock);
507
508         if (server->ops->get_lease_key)
509                 server->ops->get_lease_key(inode, &fid);
510
511         /* store open in pending opens to make sure we don't miss lease break */
512         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
513
514         /* remove it from the lists */
515         list_del(&cifs_file->flist);
516         list_del(&cifs_file->tlist);
517         atomic_dec(&tcon->num_local_opens);
518
519         if (list_empty(&cifsi->openFileList)) {
520                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
521                          d_inode(cifs_file->dentry));
522                 /*
523                  * In strict cache mode we need invalidate mapping on the last
524                  * close  because it may cause a error when we open this file
525                  * again and get at least level II oplock.
526                  */
527                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
528                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
529                 cifs_set_oplock_level(cifsi, 0);
530         }
531
532         spin_unlock(&cifsi->open_file_lock);
533         spin_unlock(&tcon->open_file_lock);
534
535         oplock_break_cancelled = wait_oplock_handler ?
536                 cancel_work_sync(&cifs_file->oplock_break) : false;
537
538         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
539                 struct TCP_Server_Info *server = tcon->ses->server;
540                 unsigned int xid;
541
542                 xid = get_xid();
543                 if (server->ops->close_getattr)
544                         server->ops->close_getattr(xid, tcon, cifs_file);
545                 else if (server->ops->close)
546                         server->ops->close(xid, tcon, &cifs_file->fid);
547                 _free_xid(xid);
548         }
549
550         if (oplock_break_cancelled)
551                 cifs_done_oplock_break(cifsi);
552
553         cifs_del_pending_open(&open);
554
555         if (offload)
556                 queue_work(fileinfo_put_wq, &cifs_file->put);
557         else
558                 cifsFileInfo_put_final(cifs_file);
559 }
560
561 int cifs_open(struct inode *inode, struct file *file)
562
563 {
564         int rc = -EACCES;
565         unsigned int xid;
566         __u32 oplock;
567         struct cifs_sb_info *cifs_sb;
568         struct TCP_Server_Info *server;
569         struct cifs_tcon *tcon;
570         struct tcon_link *tlink;
571         struct cifsFileInfo *cfile = NULL;
572         void *page;
573         const char *full_path;
574         bool posix_open_ok = false;
575         struct cifs_fid fid = {};
576         struct cifs_pending_open open;
577         struct cifs_open_info_data data = {};
578
579         xid = get_xid();
580
581         cifs_sb = CIFS_SB(inode->i_sb);
582         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
583                 free_xid(xid);
584                 return -EIO;
585         }
586
587         tlink = cifs_sb_tlink(cifs_sb);
588         if (IS_ERR(tlink)) {
589                 free_xid(xid);
590                 return PTR_ERR(tlink);
591         }
592         tcon = tlink_tcon(tlink);
593         server = tcon->ses->server;
594
595         page = alloc_dentry_path();
596         full_path = build_path_from_dentry(file_dentry(file), page);
597         if (IS_ERR(full_path)) {
598                 rc = PTR_ERR(full_path);
599                 goto out;
600         }
601
602         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
603                  inode, file->f_flags, full_path);
604
605         if (file->f_flags & O_DIRECT &&
606             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
607                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
608                         file->f_op = &cifs_file_direct_nobrl_ops;
609                 else
610                         file->f_op = &cifs_file_direct_ops;
611         }
612
613         /* Get the cached handle as SMB2 close is deferred */
614         rc = cifs_get_readable_path(tcon, full_path, &cfile);
615         if (rc == 0) {
616                 if (file->f_flags == cfile->f_flags) {
617                         file->private_data = cfile;
618                         spin_lock(&CIFS_I(inode)->deferred_lock);
619                         cifs_del_deferred_close(cfile);
620                         spin_unlock(&CIFS_I(inode)->deferred_lock);
621                         goto use_cache;
622                 } else {
623                         _cifsFileInfo_put(cfile, true, false);
624                 }
625         }
626
627         if (server->oplocks)
628                 oplock = REQ_OPLOCK;
629         else
630                 oplock = 0;
631
632 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
633         if (!tcon->broken_posix_open && tcon->unix_ext &&
634             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
635                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
636                 /* can not refresh inode info since size could be stale */
637                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
638                                 cifs_sb->ctx->file_mode /* ignored */,
639                                 file->f_flags, &oplock, &fid.netfid, xid);
640                 if (rc == 0) {
641                         cifs_dbg(FYI, "posix open succeeded\n");
642                         posix_open_ok = true;
643                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
644                         if (tcon->ses->serverNOS)
645                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
646                                          tcon->ses->ip_addr,
647                                          tcon->ses->serverNOS);
648                         tcon->broken_posix_open = true;
649                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
650                          (rc != -EOPNOTSUPP)) /* path not found or net err */
651                         goto out;
652                 /*
653                  * Else fallthrough to retry open the old way on network i/o
654                  * or DFS errors.
655                  */
656         }
657 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
658
659         if (server->ops->get_lease_key)
660                 server->ops->get_lease_key(inode, &fid);
661
662         cifs_add_pending_open(&fid, tlink, &open);
663
664         if (!posix_open_ok) {
665                 if (server->ops->get_lease_key)
666                         server->ops->get_lease_key(inode, &fid);
667
668                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
669                                   xid, &data);
670                 if (rc) {
671                         cifs_del_pending_open(&open);
672                         goto out;
673                 }
674         }
675
676         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
677         if (cfile == NULL) {
678                 if (server->ops->close)
679                         server->ops->close(xid, tcon, &fid);
680                 cifs_del_pending_open(&open);
681                 rc = -ENOMEM;
682                 goto out;
683         }
684
685 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
686         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
687                 /*
688                  * Time to set mode which we can not set earlier due to
689                  * problems creating new read-only files.
690                  */
691                 struct cifs_unix_set_info_args args = {
692                         .mode   = inode->i_mode,
693                         .uid    = INVALID_UID, /* no change */
694                         .gid    = INVALID_GID, /* no change */
695                         .ctime  = NO_CHANGE_64,
696                         .atime  = NO_CHANGE_64,
697                         .mtime  = NO_CHANGE_64,
698                         .device = 0,
699                 };
700                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
701                                        cfile->pid);
702         }
703 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
704
705 use_cache:
706         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
707                            file->f_mode & FMODE_WRITE);
708         if (file->f_flags & O_DIRECT &&
709             (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
710              file->f_flags & O_APPEND))
711                 cifs_invalidate_cache(file_inode(file),
712                                       FSCACHE_INVAL_DIO_WRITE);
713
714 out:
715         free_dentry_path(page);
716         free_xid(xid);
717         cifs_put_tlink(tlink);
718         cifs_free_open_info(&data);
719         return rc;
720 }
721
722 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
723 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
724 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
725
726 /*
727  * Try to reacquire byte range locks that were released when session
728  * to server was lost.
729  */
730 static int
731 cifs_relock_file(struct cifsFileInfo *cfile)
732 {
733         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
734         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
735         int rc = 0;
736 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
737         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
738 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
739
740         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
741         if (cinode->can_cache_brlcks) {
742                 /* can cache locks - no need to relock */
743                 up_read(&cinode->lock_sem);
744                 return rc;
745         }
746
747 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
748         if (cap_unix(tcon->ses) &&
749             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
750             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
751                 rc = cifs_push_posix_locks(cfile);
752         else
753 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
754                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
755
756         up_read(&cinode->lock_sem);
757         return rc;
758 }
759
760 static int
761 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
762 {
763         int rc = -EACCES;
764         unsigned int xid;
765         __u32 oplock;
766         struct cifs_sb_info *cifs_sb;
767         struct cifs_tcon *tcon;
768         struct TCP_Server_Info *server;
769         struct cifsInodeInfo *cinode;
770         struct inode *inode;
771         void *page;
772         const char *full_path;
773         int desired_access;
774         int disposition = FILE_OPEN;
775         int create_options = CREATE_NOT_DIR;
776         struct cifs_open_parms oparms;
777
778         xid = get_xid();
779         mutex_lock(&cfile->fh_mutex);
780         if (!cfile->invalidHandle) {
781                 mutex_unlock(&cfile->fh_mutex);
782                 free_xid(xid);
783                 return 0;
784         }
785
786         inode = d_inode(cfile->dentry);
787         cifs_sb = CIFS_SB(inode->i_sb);
788         tcon = tlink_tcon(cfile->tlink);
789         server = tcon->ses->server;
790
791         /*
792          * Can not grab rename sem here because various ops, including those
793          * that already have the rename sem can end up causing writepage to get
794          * called and if the server was down that means we end up here, and we
795          * can never tell if the caller already has the rename_sem.
796          */
797         page = alloc_dentry_path();
798         full_path = build_path_from_dentry(cfile->dentry, page);
799         if (IS_ERR(full_path)) {
800                 mutex_unlock(&cfile->fh_mutex);
801                 free_dentry_path(page);
802                 free_xid(xid);
803                 return PTR_ERR(full_path);
804         }
805
806         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
807                  inode, cfile->f_flags, full_path);
808
809         if (tcon->ses->server->oplocks)
810                 oplock = REQ_OPLOCK;
811         else
812                 oplock = 0;
813
814 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
815         if (tcon->unix_ext && cap_unix(tcon->ses) &&
816             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
817                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
818                 /*
819                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
820                  * original open. Must mask them off for a reopen.
821                  */
822                 unsigned int oflags = cfile->f_flags &
823                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
824
825                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
826                                      cifs_sb->ctx->file_mode /* ignored */,
827                                      oflags, &oplock, &cfile->fid.netfid, xid);
828                 if (rc == 0) {
829                         cifs_dbg(FYI, "posix reopen succeeded\n");
830                         oparms.reconnect = true;
831                         goto reopen_success;
832                 }
833                 /*
834                  * fallthrough to retry open the old way on errors, especially
835                  * in the reconnect path it is important to retry hard
836                  */
837         }
838 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
839
840         desired_access = cifs_convert_flags(cfile->f_flags);
841
842         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
843         if (cfile->f_flags & O_SYNC)
844                 create_options |= CREATE_WRITE_THROUGH;
845
846         if (cfile->f_flags & O_DIRECT)
847                 create_options |= CREATE_NO_BUFFER;
848
849         if (server->ops->get_lease_key)
850                 server->ops->get_lease_key(inode, &cfile->fid);
851
852         oparms = (struct cifs_open_parms) {
853                 .tcon = tcon,
854                 .cifs_sb = cifs_sb,
855                 .desired_access = desired_access,
856                 .create_options = cifs_create_options(cifs_sb, create_options),
857                 .disposition = disposition,
858                 .path = full_path,
859                 .fid = &cfile->fid,
860                 .reconnect = true,
861         };
862
863         /*
864          * Can not refresh inode by passing in file_info buf to be returned by
865          * ops->open and then calling get_inode_info with returned buf since
866          * file might have write behind data that needs to be flushed and server
867          * version of file size can be stale. If we knew for sure that inode was
868          * not dirty locally we could do this.
869          */
870         rc = server->ops->open(xid, &oparms, &oplock, NULL);
871         if (rc == -ENOENT && oparms.reconnect == false) {
872                 /* durable handle timeout is expired - open the file again */
873                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
874                 /* indicate that we need to relock the file */
875                 oparms.reconnect = true;
876         }
877
878         if (rc) {
879                 mutex_unlock(&cfile->fh_mutex);
880                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
881                 cifs_dbg(FYI, "oplock: %d\n", oplock);
882                 goto reopen_error_exit;
883         }
884
885 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
886 reopen_success:
887 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
888         cfile->invalidHandle = false;
889         mutex_unlock(&cfile->fh_mutex);
890         cinode = CIFS_I(inode);
891
892         if (can_flush) {
893                 rc = filemap_write_and_wait(inode->i_mapping);
894                 if (!is_interrupt_error(rc))
895                         mapping_set_error(inode->i_mapping, rc);
896
897                 if (tcon->posix_extensions)
898                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
899                 else if (tcon->unix_ext)
900                         rc = cifs_get_inode_info_unix(&inode, full_path,
901                                                       inode->i_sb, xid);
902                 else
903                         rc = cifs_get_inode_info(&inode, full_path, NULL,
904                                                  inode->i_sb, xid, NULL);
905         }
906         /*
907          * Else we are writing out data to server already and could deadlock if
908          * we tried to flush data, and since we do not know if we have data that
909          * would invalidate the current end of file on the server we can not go
910          * to the server to get the new inode info.
911          */
912
913         /*
914          * If the server returned a read oplock and we have mandatory brlocks,
915          * set oplock level to None.
916          */
917         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
918                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
919                 oplock = 0;
920         }
921
922         server->ops->set_fid(cfile, &cfile->fid, oplock);
923         if (oparms.reconnect)
924                 cifs_relock_file(cfile);
925
926 reopen_error_exit:
927         free_dentry_path(page);
928         free_xid(xid);
929         return rc;
930 }
931
932 void smb2_deferred_work_close(struct work_struct *work)
933 {
934         struct cifsFileInfo *cfile = container_of(work,
935                         struct cifsFileInfo, deferred.work);
936
937         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
938         cifs_del_deferred_close(cfile);
939         cfile->deferred_close_scheduled = false;
940         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
941         _cifsFileInfo_put(cfile, true, false);
942 }
943
944 int cifs_close(struct inode *inode, struct file *file)
945 {
946         struct cifsFileInfo *cfile;
947         struct cifsInodeInfo *cinode = CIFS_I(inode);
948         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
949         struct cifs_deferred_close *dclose;
950
951         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
952
953         if (file->private_data != NULL) {
954                 cfile = file->private_data;
955                 file->private_data = NULL;
956                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
957                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
958                     cinode->lease_granted &&
959                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
960                     dclose) {
961                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
962                                 inode->i_ctime = inode->i_mtime = current_time(inode);
963                         }
964                         spin_lock(&cinode->deferred_lock);
965                         cifs_add_deferred_close(cfile, dclose);
966                         if (cfile->deferred_close_scheduled &&
967                             delayed_work_pending(&cfile->deferred)) {
968                                 /*
969                                  * If there is no pending work, mod_delayed_work queues new work.
970                                  * So, Increase the ref count to avoid use-after-free.
971                                  */
972                                 if (!mod_delayed_work(deferredclose_wq,
973                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
974                                         cifsFileInfo_get(cfile);
975                         } else {
976                                 /* Deferred close for files */
977                                 queue_delayed_work(deferredclose_wq,
978                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
979                                 cfile->deferred_close_scheduled = true;
980                                 spin_unlock(&cinode->deferred_lock);
981                                 return 0;
982                         }
983                         spin_unlock(&cinode->deferred_lock);
984                         _cifsFileInfo_put(cfile, true, false);
985                 } else {
986                         _cifsFileInfo_put(cfile, true, false);
987                         kfree(dclose);
988                 }
989         }
990
991         /* return code from the ->release op is always ignored */
992         return 0;
993 }
994
995 void
996 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
997 {
998         struct cifsFileInfo *open_file, *tmp;
999         struct list_head tmp_list;
1000
1001         if (!tcon->use_persistent || !tcon->need_reopen_files)
1002                 return;
1003
1004         tcon->need_reopen_files = false;
1005
1006         cifs_dbg(FYI, "Reopen persistent handles\n");
1007         INIT_LIST_HEAD(&tmp_list);
1008
1009         /* list all files open on tree connection, reopen resilient handles  */
1010         spin_lock(&tcon->open_file_lock);
1011         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1012                 if (!open_file->invalidHandle)
1013                         continue;
1014                 cifsFileInfo_get(open_file);
1015                 list_add_tail(&open_file->rlist, &tmp_list);
1016         }
1017         spin_unlock(&tcon->open_file_lock);
1018
1019         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1020                 if (cifs_reopen_file(open_file, false /* do not flush */))
1021                         tcon->need_reopen_files = true;
1022                 list_del_init(&open_file->rlist);
1023                 cifsFileInfo_put(open_file);
1024         }
1025 }
1026
1027 int cifs_closedir(struct inode *inode, struct file *file)
1028 {
1029         int rc = 0;
1030         unsigned int xid;
1031         struct cifsFileInfo *cfile = file->private_data;
1032         struct cifs_tcon *tcon;
1033         struct TCP_Server_Info *server;
1034         char *buf;
1035
1036         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1037
1038         if (cfile == NULL)
1039                 return rc;
1040
1041         xid = get_xid();
1042         tcon = tlink_tcon(cfile->tlink);
1043         server = tcon->ses->server;
1044
1045         cifs_dbg(FYI, "Freeing private data in close dir\n");
1046         spin_lock(&cfile->file_info_lock);
1047         if (server->ops->dir_needs_close(cfile)) {
1048                 cfile->invalidHandle = true;
1049                 spin_unlock(&cfile->file_info_lock);
1050                 if (server->ops->close_dir)
1051                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1052                 else
1053                         rc = -ENOSYS;
1054                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1055                 /* not much we can do if it fails anyway, ignore rc */
1056                 rc = 0;
1057         } else
1058                 spin_unlock(&cfile->file_info_lock);
1059
1060         buf = cfile->srch_inf.ntwrk_buf_start;
1061         if (buf) {
1062                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1063                 cfile->srch_inf.ntwrk_buf_start = NULL;
1064                 if (cfile->srch_inf.smallBuf)
1065                         cifs_small_buf_release(buf);
1066                 else
1067                         cifs_buf_release(buf);
1068         }
1069
1070         cifs_put_tlink(cfile->tlink);
1071         kfree(file->private_data);
1072         file->private_data = NULL;
1073         /* BB can we lock the filestruct while this is going on? */
1074         free_xid(xid);
1075         return rc;
1076 }
1077
1078 static struct cifsLockInfo *
1079 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1080 {
1081         struct cifsLockInfo *lock =
1082                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1083         if (!lock)
1084                 return lock;
1085         lock->offset = offset;
1086         lock->length = length;
1087         lock->type = type;
1088         lock->pid = current->tgid;
1089         lock->flags = flags;
1090         INIT_LIST_HEAD(&lock->blist);
1091         init_waitqueue_head(&lock->block_q);
1092         return lock;
1093 }
1094
1095 void
1096 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1097 {
1098         struct cifsLockInfo *li, *tmp;
1099         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1100                 list_del_init(&li->blist);
1101                 wake_up(&li->block_q);
1102         }
1103 }
1104
1105 #define CIFS_LOCK_OP    0
1106 #define CIFS_READ_OP    1
1107 #define CIFS_WRITE_OP   2
1108
1109 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1110 static bool
1111 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1112                             __u64 length, __u8 type, __u16 flags,
1113                             struct cifsFileInfo *cfile,
1114                             struct cifsLockInfo **conf_lock, int rw_check)
1115 {
1116         struct cifsLockInfo *li;
1117         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1118         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1119
1120         list_for_each_entry(li, &fdlocks->locks, llist) {
1121                 if (offset + length <= li->offset ||
1122                     offset >= li->offset + li->length)
1123                         continue;
1124                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1125                     server->ops->compare_fids(cfile, cur_cfile)) {
1126                         /* shared lock prevents write op through the same fid */
1127                         if (!(li->type & server->vals->shared_lock_type) ||
1128                             rw_check != CIFS_WRITE_OP)
1129                                 continue;
1130                 }
1131                 if ((type & server->vals->shared_lock_type) &&
1132                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1133                      current->tgid == li->pid) || type == li->type))
1134                         continue;
1135                 if (rw_check == CIFS_LOCK_OP &&
1136                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1137                     server->ops->compare_fids(cfile, cur_cfile))
1138                         continue;
1139                 if (conf_lock)
1140                         *conf_lock = li;
1141                 return true;
1142         }
1143         return false;
1144 }
1145
1146 bool
1147 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1148                         __u8 type, __u16 flags,
1149                         struct cifsLockInfo **conf_lock, int rw_check)
1150 {
1151         bool rc = false;
1152         struct cifs_fid_locks *cur;
1153         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1154
1155         list_for_each_entry(cur, &cinode->llist, llist) {
1156                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1157                                                  flags, cfile, conf_lock,
1158                                                  rw_check);
1159                 if (rc)
1160                         break;
1161         }
1162
1163         return rc;
1164 }
1165
1166 /*
1167  * Check if there is another lock that prevents us to set the lock (mandatory
1168  * style). If such a lock exists, update the flock structure with its
1169  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1170  * or leave it the same if we can't. Returns 0 if we don't need to request to
1171  * the server or 1 otherwise.
1172  */
1173 static int
1174 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1175                __u8 type, struct file_lock *flock)
1176 {
1177         int rc = 0;
1178         struct cifsLockInfo *conf_lock;
1179         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1180         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1181         bool exist;
1182
1183         down_read(&cinode->lock_sem);
1184
1185         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1186                                         flock->fl_flags, &conf_lock,
1187                                         CIFS_LOCK_OP);
1188         if (exist) {
1189                 flock->fl_start = conf_lock->offset;
1190                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1191                 flock->fl_pid = conf_lock->pid;
1192                 if (conf_lock->type & server->vals->shared_lock_type)
1193                         flock->fl_type = F_RDLCK;
1194                 else
1195                         flock->fl_type = F_WRLCK;
1196         } else if (!cinode->can_cache_brlcks)
1197                 rc = 1;
1198         else
1199                 flock->fl_type = F_UNLCK;
1200
1201         up_read(&cinode->lock_sem);
1202         return rc;
1203 }
1204
1205 static void
1206 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1207 {
1208         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1209         cifs_down_write(&cinode->lock_sem);
1210         list_add_tail(&lock->llist, &cfile->llist->locks);
1211         up_write(&cinode->lock_sem);
1212 }
1213
1214 /*
1215  * Set the byte-range lock (mandatory style). Returns:
1216  * 1) 0, if we set the lock and don't need to request to the server;
1217  * 2) 1, if no locks prevent us but we need to request to the server;
1218  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1219  */
1220 static int
1221 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1222                  bool wait)
1223 {
1224         struct cifsLockInfo *conf_lock;
1225         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1226         bool exist;
1227         int rc = 0;
1228
1229 try_again:
1230         exist = false;
1231         cifs_down_write(&cinode->lock_sem);
1232
1233         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1234                                         lock->type, lock->flags, &conf_lock,
1235                                         CIFS_LOCK_OP);
1236         if (!exist && cinode->can_cache_brlcks) {
1237                 list_add_tail(&lock->llist, &cfile->llist->locks);
1238                 up_write(&cinode->lock_sem);
1239                 return rc;
1240         }
1241
1242         if (!exist)
1243                 rc = 1;
1244         else if (!wait)
1245                 rc = -EACCES;
1246         else {
1247                 list_add_tail(&lock->blist, &conf_lock->blist);
1248                 up_write(&cinode->lock_sem);
1249                 rc = wait_event_interruptible(lock->block_q,
1250                                         (lock->blist.prev == &lock->blist) &&
1251                                         (lock->blist.next == &lock->blist));
1252                 if (!rc)
1253                         goto try_again;
1254                 cifs_down_write(&cinode->lock_sem);
1255                 list_del_init(&lock->blist);
1256         }
1257
1258         up_write(&cinode->lock_sem);
1259         return rc;
1260 }
1261
1262 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1263 /*
1264  * Check if there is another lock that prevents us to set the lock (posix
1265  * style). If such a lock exists, update the flock structure with its
1266  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1267  * or leave it the same if we can't. Returns 0 if we don't need to request to
1268  * the server or 1 otherwise.
1269  */
1270 static int
1271 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1272 {
1273         int rc = 0;
1274         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1275         unsigned char saved_type = flock->fl_type;
1276
1277         if ((flock->fl_flags & FL_POSIX) == 0)
1278                 return 1;
1279
1280         down_read(&cinode->lock_sem);
1281         posix_test_lock(file, flock);
1282
1283         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1284                 flock->fl_type = saved_type;
1285                 rc = 1;
1286         }
1287
1288         up_read(&cinode->lock_sem);
1289         return rc;
1290 }
1291
1292 /*
1293  * Set the byte-range lock (posix style). Returns:
1294  * 1) <0, if the error occurs while setting the lock;
1295  * 2) 0, if we set the lock and don't need to request to the server;
1296  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1297  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1298  */
1299 static int
1300 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1301 {
1302         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1303         int rc = FILE_LOCK_DEFERRED + 1;
1304
1305         if ((flock->fl_flags & FL_POSIX) == 0)
1306                 return rc;
1307
1308         cifs_down_write(&cinode->lock_sem);
1309         if (!cinode->can_cache_brlcks) {
1310                 up_write(&cinode->lock_sem);
1311                 return rc;
1312         }
1313
1314         rc = posix_lock_file(file, flock, NULL);
1315         up_write(&cinode->lock_sem);
1316         return rc;
1317 }
1318
1319 int
1320 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1321 {
1322         unsigned int xid;
1323         int rc = 0, stored_rc;
1324         struct cifsLockInfo *li, *tmp;
1325         struct cifs_tcon *tcon;
1326         unsigned int num, max_num, max_buf;
1327         LOCKING_ANDX_RANGE *buf, *cur;
1328         static const int types[] = {
1329                 LOCKING_ANDX_LARGE_FILES,
1330                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1331         };
1332         int i;
1333
1334         xid = get_xid();
1335         tcon = tlink_tcon(cfile->tlink);
1336
1337         /*
1338          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1339          * and check it before using.
1340          */
1341         max_buf = tcon->ses->server->maxBuf;
1342         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1343                 free_xid(xid);
1344                 return -EINVAL;
1345         }
1346
1347         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1348                      PAGE_SIZE);
1349         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1350                         PAGE_SIZE);
1351         max_num = (max_buf - sizeof(struct smb_hdr)) /
1352                                                 sizeof(LOCKING_ANDX_RANGE);
1353         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1354         if (!buf) {
1355                 free_xid(xid);
1356                 return -ENOMEM;
1357         }
1358
1359         for (i = 0; i < 2; i++) {
1360                 cur = buf;
1361                 num = 0;
1362                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1363                         if (li->type != types[i])
1364                                 continue;
1365                         cur->Pid = cpu_to_le16(li->pid);
1366                         cur->LengthLow = cpu_to_le32((u32)li->length);
1367                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1368                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1369                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1370                         if (++num == max_num) {
1371                                 stored_rc = cifs_lockv(xid, tcon,
1372                                                        cfile->fid.netfid,
1373                                                        (__u8)li->type, 0, num,
1374                                                        buf);
1375                                 if (stored_rc)
1376                                         rc = stored_rc;
1377                                 cur = buf;
1378                                 num = 0;
1379                         } else
1380                                 cur++;
1381                 }
1382
1383                 if (num) {
1384                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1385                                                (__u8)types[i], 0, num, buf);
1386                         if (stored_rc)
1387                                 rc = stored_rc;
1388                 }
1389         }
1390
1391         kfree(buf);
1392         free_xid(xid);
1393         return rc;
1394 }
1395
1396 static __u32
1397 hash_lockowner(fl_owner_t owner)
1398 {
1399         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1400 }
1401 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1402
1403 struct lock_to_push {
1404         struct list_head llist;
1405         __u64 offset;
1406         __u64 length;
1407         __u32 pid;
1408         __u16 netfid;
1409         __u8 type;
1410 };
1411
1412 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1413 static int
1414 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1415 {
1416         struct inode *inode = d_inode(cfile->dentry);
1417         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1418         struct file_lock *flock;
1419         struct file_lock_context *flctx = inode->i_flctx;
1420         unsigned int count = 0, i;
1421         int rc = 0, xid, type;
1422         struct list_head locks_to_send, *el;
1423         struct lock_to_push *lck, *tmp;
1424         __u64 length;
1425
1426         xid = get_xid();
1427
1428         if (!flctx)
1429                 goto out;
1430
1431         spin_lock(&flctx->flc_lock);
1432         list_for_each(el, &flctx->flc_posix) {
1433                 count++;
1434         }
1435         spin_unlock(&flctx->flc_lock);
1436
1437         INIT_LIST_HEAD(&locks_to_send);
1438
1439         /*
1440          * Allocating count locks is enough because no FL_POSIX locks can be
1441          * added to the list while we are holding cinode->lock_sem that
1442          * protects locking operations of this inode.
1443          */
1444         for (i = 0; i < count; i++) {
1445                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1446                 if (!lck) {
1447                         rc = -ENOMEM;
1448                         goto err_out;
1449                 }
1450                 list_add_tail(&lck->llist, &locks_to_send);
1451         }
1452
1453         el = locks_to_send.next;
1454         spin_lock(&flctx->flc_lock);
1455         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1456                 if (el == &locks_to_send) {
1457                         /*
1458                          * The list ended. We don't have enough allocated
1459                          * structures - something is really wrong.
1460                          */
1461                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1462                         break;
1463                 }
1464                 length = cifs_flock_len(flock);
1465                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1466                         type = CIFS_RDLCK;
1467                 else
1468                         type = CIFS_WRLCK;
1469                 lck = list_entry(el, struct lock_to_push, llist);
1470                 lck->pid = hash_lockowner(flock->fl_owner);
1471                 lck->netfid = cfile->fid.netfid;
1472                 lck->length = length;
1473                 lck->type = type;
1474                 lck->offset = flock->fl_start;
1475         }
1476         spin_unlock(&flctx->flc_lock);
1477
1478         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1479                 int stored_rc;
1480
1481                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1482                                              lck->offset, lck->length, NULL,
1483                                              lck->type, 0);
1484                 if (stored_rc)
1485                         rc = stored_rc;
1486                 list_del(&lck->llist);
1487                 kfree(lck);
1488         }
1489
1490 out:
1491         free_xid(xid);
1492         return rc;
1493 err_out:
1494         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1495                 list_del(&lck->llist);
1496                 kfree(lck);
1497         }
1498         goto out;
1499 }
1500 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1501
1502 static int
1503 cifs_push_locks(struct cifsFileInfo *cfile)
1504 {
1505         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1506         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1507         int rc = 0;
1508 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1509         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1510 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1511
1512         /* we are going to update can_cache_brlcks here - need a write access */
1513         cifs_down_write(&cinode->lock_sem);
1514         if (!cinode->can_cache_brlcks) {
1515                 up_write(&cinode->lock_sem);
1516                 return rc;
1517         }
1518
1519 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1520         if (cap_unix(tcon->ses) &&
1521             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1522             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1523                 rc = cifs_push_posix_locks(cfile);
1524         else
1525 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1526                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1527
1528         cinode->can_cache_brlcks = false;
1529         up_write(&cinode->lock_sem);
1530         return rc;
1531 }
1532
1533 static void
1534 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1535                 bool *wait_flag, struct TCP_Server_Info *server)
1536 {
1537         if (flock->fl_flags & FL_POSIX)
1538                 cifs_dbg(FYI, "Posix\n");
1539         if (flock->fl_flags & FL_FLOCK)
1540                 cifs_dbg(FYI, "Flock\n");
1541         if (flock->fl_flags & FL_SLEEP) {
1542                 cifs_dbg(FYI, "Blocking lock\n");
1543                 *wait_flag = true;
1544         }
1545         if (flock->fl_flags & FL_ACCESS)
1546                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1547         if (flock->fl_flags & FL_LEASE)
1548                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1549         if (flock->fl_flags &
1550             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1551                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1552                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1553
1554         *type = server->vals->large_lock_type;
1555         if (flock->fl_type == F_WRLCK) {
1556                 cifs_dbg(FYI, "F_WRLCK\n");
1557                 *type |= server->vals->exclusive_lock_type;
1558                 *lock = 1;
1559         } else if (flock->fl_type == F_UNLCK) {
1560                 cifs_dbg(FYI, "F_UNLCK\n");
1561                 *type |= server->vals->unlock_lock_type;
1562                 *unlock = 1;
1563                 /* Check if unlock includes more than one lock range */
1564         } else if (flock->fl_type == F_RDLCK) {
1565                 cifs_dbg(FYI, "F_RDLCK\n");
1566                 *type |= server->vals->shared_lock_type;
1567                 *lock = 1;
1568         } else if (flock->fl_type == F_EXLCK) {
1569                 cifs_dbg(FYI, "F_EXLCK\n");
1570                 *type |= server->vals->exclusive_lock_type;
1571                 *lock = 1;
1572         } else if (flock->fl_type == F_SHLCK) {
1573                 cifs_dbg(FYI, "F_SHLCK\n");
1574                 *type |= server->vals->shared_lock_type;
1575                 *lock = 1;
1576         } else
1577                 cifs_dbg(FYI, "Unknown type of lock\n");
1578 }
1579
1580 static int
1581 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1582            bool wait_flag, bool posix_lck, unsigned int xid)
1583 {
1584         int rc = 0;
1585         __u64 length = cifs_flock_len(flock);
1586         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1587         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1588         struct TCP_Server_Info *server = tcon->ses->server;
1589 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1590         __u16 netfid = cfile->fid.netfid;
1591
1592         if (posix_lck) {
1593                 int posix_lock_type;
1594
1595                 rc = cifs_posix_lock_test(file, flock);
1596                 if (!rc)
1597                         return rc;
1598
1599                 if (type & server->vals->shared_lock_type)
1600                         posix_lock_type = CIFS_RDLCK;
1601                 else
1602                         posix_lock_type = CIFS_WRLCK;
1603                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1604                                       hash_lockowner(flock->fl_owner),
1605                                       flock->fl_start, length, flock,
1606                                       posix_lock_type, wait_flag);
1607                 return rc;
1608         }
1609 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1610
1611         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1612         if (!rc)
1613                 return rc;
1614
1615         /* BB we could chain these into one lock request BB */
1616         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1617                                     1, 0, false);
1618         if (rc == 0) {
1619                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1620                                             type, 0, 1, false);
1621                 flock->fl_type = F_UNLCK;
1622                 if (rc != 0)
1623                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1624                                  rc);
1625                 return 0;
1626         }
1627
1628         if (type & server->vals->shared_lock_type) {
1629                 flock->fl_type = F_WRLCK;
1630                 return 0;
1631         }
1632
1633         type &= ~server->vals->exclusive_lock_type;
1634
1635         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1636                                     type | server->vals->shared_lock_type,
1637                                     1, 0, false);
1638         if (rc == 0) {
1639                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1640                         type | server->vals->shared_lock_type, 0, 1, false);
1641                 flock->fl_type = F_RDLCK;
1642                 if (rc != 0)
1643                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1644                                  rc);
1645         } else
1646                 flock->fl_type = F_WRLCK;
1647
1648         return 0;
1649 }
1650
1651 void
1652 cifs_move_llist(struct list_head *source, struct list_head *dest)
1653 {
1654         struct list_head *li, *tmp;
1655         list_for_each_safe(li, tmp, source)
1656                 list_move(li, dest);
1657 }
1658
1659 void
1660 cifs_free_llist(struct list_head *llist)
1661 {
1662         struct cifsLockInfo *li, *tmp;
1663         list_for_each_entry_safe(li, tmp, llist, llist) {
1664                 cifs_del_lock_waiters(li);
1665                 list_del(&li->llist);
1666                 kfree(li);
1667         }
1668 }
1669
1670 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1671 int
1672 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1673                   unsigned int xid)
1674 {
1675         int rc = 0, stored_rc;
1676         static const int types[] = {
1677                 LOCKING_ANDX_LARGE_FILES,
1678                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1679         };
1680         unsigned int i;
1681         unsigned int max_num, num, max_buf;
1682         LOCKING_ANDX_RANGE *buf, *cur;
1683         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1684         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1685         struct cifsLockInfo *li, *tmp;
1686         __u64 length = cifs_flock_len(flock);
1687         struct list_head tmp_llist;
1688
1689         INIT_LIST_HEAD(&tmp_llist);
1690
1691         /*
1692          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1693          * and check it before using.
1694          */
1695         max_buf = tcon->ses->server->maxBuf;
1696         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1697                 return -EINVAL;
1698
1699         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1700                      PAGE_SIZE);
1701         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1702                         PAGE_SIZE);
1703         max_num = (max_buf - sizeof(struct smb_hdr)) /
1704                                                 sizeof(LOCKING_ANDX_RANGE);
1705         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1706         if (!buf)
1707                 return -ENOMEM;
1708
1709         cifs_down_write(&cinode->lock_sem);
1710         for (i = 0; i < 2; i++) {
1711                 cur = buf;
1712                 num = 0;
1713                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1714                         if (flock->fl_start > li->offset ||
1715                             (flock->fl_start + length) <
1716                             (li->offset + li->length))
1717                                 continue;
1718                         if (current->tgid != li->pid)
1719                                 continue;
1720                         if (types[i] != li->type)
1721                                 continue;
1722                         if (cinode->can_cache_brlcks) {
1723                                 /*
1724                                  * We can cache brlock requests - simply remove
1725                                  * a lock from the file's list.
1726                                  */
1727                                 list_del(&li->llist);
1728                                 cifs_del_lock_waiters(li);
1729                                 kfree(li);
1730                                 continue;
1731                         }
1732                         cur->Pid = cpu_to_le16(li->pid);
1733                         cur->LengthLow = cpu_to_le32((u32)li->length);
1734                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1735                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1736                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1737                         /*
1738                          * We need to save a lock here to let us add it again to
1739                          * the file's list if the unlock range request fails on
1740                          * the server.
1741                          */
1742                         list_move(&li->llist, &tmp_llist);
1743                         if (++num == max_num) {
1744                                 stored_rc = cifs_lockv(xid, tcon,
1745                                                        cfile->fid.netfid,
1746                                                        li->type, num, 0, buf);
1747                                 if (stored_rc) {
1748                                         /*
1749                                          * We failed on the unlock range
1750                                          * request - add all locks from the tmp
1751                                          * list to the head of the file's list.
1752                                          */
1753                                         cifs_move_llist(&tmp_llist,
1754                                                         &cfile->llist->locks);
1755                                         rc = stored_rc;
1756                                 } else
1757                                         /*
1758                                          * The unlock range request succeed -
1759                                          * free the tmp list.
1760                                          */
1761                                         cifs_free_llist(&tmp_llist);
1762                                 cur = buf;
1763                                 num = 0;
1764                         } else
1765                                 cur++;
1766                 }
1767                 if (num) {
1768                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1769                                                types[i], num, 0, buf);
1770                         if (stored_rc) {
1771                                 cifs_move_llist(&tmp_llist,
1772                                                 &cfile->llist->locks);
1773                                 rc = stored_rc;
1774                         } else
1775                                 cifs_free_llist(&tmp_llist);
1776                 }
1777         }
1778
1779         up_write(&cinode->lock_sem);
1780         kfree(buf);
1781         return rc;
1782 }
1783 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1784
1785 static int
1786 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1787            bool wait_flag, bool posix_lck, int lock, int unlock,
1788            unsigned int xid)
1789 {
1790         int rc = 0;
1791         __u64 length = cifs_flock_len(flock);
1792         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1793         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1794         struct TCP_Server_Info *server = tcon->ses->server;
1795         struct inode *inode = d_inode(cfile->dentry);
1796
1797 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1798         if (posix_lck) {
1799                 int posix_lock_type;
1800
1801                 rc = cifs_posix_lock_set(file, flock);
1802                 if (rc <= FILE_LOCK_DEFERRED)
1803                         return rc;
1804
1805                 if (type & server->vals->shared_lock_type)
1806                         posix_lock_type = CIFS_RDLCK;
1807                 else
1808                         posix_lock_type = CIFS_WRLCK;
1809
1810                 if (unlock == 1)
1811                         posix_lock_type = CIFS_UNLCK;
1812
1813                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1814                                       hash_lockowner(flock->fl_owner),
1815                                       flock->fl_start, length,
1816                                       NULL, posix_lock_type, wait_flag);
1817                 goto out;
1818         }
1819 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1820         if (lock) {
1821                 struct cifsLockInfo *lock;
1822
1823                 lock = cifs_lock_init(flock->fl_start, length, type,
1824                                       flock->fl_flags);
1825                 if (!lock)
1826                         return -ENOMEM;
1827
1828                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1829                 if (rc < 0) {
1830                         kfree(lock);
1831                         return rc;
1832                 }
1833                 if (!rc)
1834                         goto out;
1835
1836                 /*
1837                  * Windows 7 server can delay breaking lease from read to None
1838                  * if we set a byte-range lock on a file - break it explicitly
1839                  * before sending the lock to the server to be sure the next
1840                  * read won't conflict with non-overlapted locks due to
1841                  * pagereading.
1842                  */
1843                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1844                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1845                         cifs_zap_mapping(inode);
1846                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1847                                  inode);
1848                         CIFS_I(inode)->oplock = 0;
1849                 }
1850
1851                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1852                                             type, 1, 0, wait_flag);
1853                 if (rc) {
1854                         kfree(lock);
1855                         return rc;
1856                 }
1857
1858                 cifs_lock_add(cfile, lock);
1859         } else if (unlock)
1860                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1861
1862 out:
1863         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1864                 /*
1865                  * If this is a request to remove all locks because we
1866                  * are closing the file, it doesn't matter if the
1867                  * unlocking failed as both cifs.ko and the SMB server
1868                  * remove the lock on file close
1869                  */
1870                 if (rc) {
1871                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1872                         if (!(flock->fl_flags & FL_CLOSE))
1873                                 return rc;
1874                 }
1875                 rc = locks_lock_file_wait(file, flock);
1876         }
1877         return rc;
1878 }
1879
1880 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1881 {
1882         int rc, xid;
1883         int lock = 0, unlock = 0;
1884         bool wait_flag = false;
1885         bool posix_lck = false;
1886         struct cifs_sb_info *cifs_sb;
1887         struct cifs_tcon *tcon;
1888         struct cifsFileInfo *cfile;
1889         __u32 type;
1890
1891         xid = get_xid();
1892
1893         if (!(fl->fl_flags & FL_FLOCK)) {
1894                 rc = -ENOLCK;
1895                 free_xid(xid);
1896                 return rc;
1897         }
1898
1899         cfile = (struct cifsFileInfo *)file->private_data;
1900         tcon = tlink_tcon(cfile->tlink);
1901
1902         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1903                         tcon->ses->server);
1904         cifs_sb = CIFS_FILE_SB(file);
1905
1906         if (cap_unix(tcon->ses) &&
1907             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1908             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1909                 posix_lck = true;
1910
1911         if (!lock && !unlock) {
1912                 /*
1913                  * if no lock or unlock then nothing to do since we do not
1914                  * know what it is
1915                  */
1916                 rc = -EOPNOTSUPP;
1917                 free_xid(xid);
1918                 return rc;
1919         }
1920
1921         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1922                         xid);
1923         free_xid(xid);
1924         return rc;
1925
1926
1927 }
1928
1929 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1930 {
1931         int rc, xid;
1932         int lock = 0, unlock = 0;
1933         bool wait_flag = false;
1934         bool posix_lck = false;
1935         struct cifs_sb_info *cifs_sb;
1936         struct cifs_tcon *tcon;
1937         struct cifsFileInfo *cfile;
1938         __u32 type;
1939
1940         rc = -EACCES;
1941         xid = get_xid();
1942
1943         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
1944                  flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
1945                  (long long)flock->fl_end);
1946
1947         cfile = (struct cifsFileInfo *)file->private_data;
1948         tcon = tlink_tcon(cfile->tlink);
1949
1950         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1951                         tcon->ses->server);
1952         cifs_sb = CIFS_FILE_SB(file);
1953         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1954
1955         if (cap_unix(tcon->ses) &&
1956             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1957             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1958                 posix_lck = true;
1959         /*
1960          * BB add code here to normalize offset and length to account for
1961          * negative length which we can not accept over the wire.
1962          */
1963         if (IS_GETLK(cmd)) {
1964                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1965                 free_xid(xid);
1966                 return rc;
1967         }
1968
1969         if (!lock && !unlock) {
1970                 /*
1971                  * if no lock or unlock then nothing to do since we do not
1972                  * know what it is
1973                  */
1974                 free_xid(xid);
1975                 return -EOPNOTSUPP;
1976         }
1977
1978         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1979                         xid);
1980         free_xid(xid);
1981         return rc;
1982 }
1983
1984 /*
1985  * update the file size (if needed) after a write. Should be called with
1986  * the inode->i_lock held
1987  */
1988 void
1989 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1990                       unsigned int bytes_written)
1991 {
1992         loff_t end_of_write = offset + bytes_written;
1993
1994         if (end_of_write > cifsi->server_eof)
1995                 cifsi->server_eof = end_of_write;
1996 }
1997
1998 static ssize_t
1999 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2000            size_t write_size, loff_t *offset)
2001 {
2002         int rc = 0;
2003         unsigned int bytes_written = 0;
2004         unsigned int total_written;
2005         struct cifs_tcon *tcon;
2006         struct TCP_Server_Info *server;
2007         unsigned int xid;
2008         struct dentry *dentry = open_file->dentry;
2009         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2010         struct cifs_io_parms io_parms = {0};
2011
2012         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2013                  write_size, *offset, dentry);
2014
2015         tcon = tlink_tcon(open_file->tlink);
2016         server = tcon->ses->server;
2017
2018         if (!server->ops->sync_write)
2019                 return -ENOSYS;
2020
2021         xid = get_xid();
2022
2023         for (total_written = 0; write_size > total_written;
2024              total_written += bytes_written) {
2025                 rc = -EAGAIN;
2026                 while (rc == -EAGAIN) {
2027                         struct kvec iov[2];
2028                         unsigned int len;
2029
2030                         if (open_file->invalidHandle) {
2031                                 /* we could deadlock if we called
2032                                    filemap_fdatawait from here so tell
2033                                    reopen_file not to flush data to
2034                                    server now */
2035                                 rc = cifs_reopen_file(open_file, false);
2036                                 if (rc != 0)
2037                                         break;
2038                         }
2039
2040                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2041                                   (unsigned int)write_size - total_written);
2042                         /* iov[0] is reserved for smb header */
2043                         iov[1].iov_base = (char *)write_data + total_written;
2044                         iov[1].iov_len = len;
2045                         io_parms.pid = pid;
2046                         io_parms.tcon = tcon;
2047                         io_parms.offset = *offset;
2048                         io_parms.length = len;
2049                         rc = server->ops->sync_write(xid, &open_file->fid,
2050                                         &io_parms, &bytes_written, iov, 1);
2051                 }
2052                 if (rc || (bytes_written == 0)) {
2053                         if (total_written)
2054                                 break;
2055                         else {
2056                                 free_xid(xid);
2057                                 return rc;
2058                         }
2059                 } else {
2060                         spin_lock(&d_inode(dentry)->i_lock);
2061                         cifs_update_eof(cifsi, *offset, bytes_written);
2062                         spin_unlock(&d_inode(dentry)->i_lock);
2063                         *offset += bytes_written;
2064                 }
2065         }
2066
2067         cifs_stats_bytes_written(tcon, total_written);
2068
2069         if (total_written > 0) {
2070                 spin_lock(&d_inode(dentry)->i_lock);
2071                 if (*offset > d_inode(dentry)->i_size) {
2072                         i_size_write(d_inode(dentry), *offset);
2073                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2074                 }
2075                 spin_unlock(&d_inode(dentry)->i_lock);
2076         }
2077         mark_inode_dirty_sync(d_inode(dentry));
2078         free_xid(xid);
2079         return total_written;
2080 }
2081
2082 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2083                                         bool fsuid_only)
2084 {
2085         struct cifsFileInfo *open_file = NULL;
2086         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2087
2088         /* only filter by fsuid on multiuser mounts */
2089         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2090                 fsuid_only = false;
2091
2092         spin_lock(&cifs_inode->open_file_lock);
2093         /* we could simply get the first_list_entry since write-only entries
2094            are always at the end of the list but since the first entry might
2095            have a close pending, we go through the whole list */
2096         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2097                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2098                         continue;
2099                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2100                         if ((!open_file->invalidHandle)) {
2101                                 /* found a good file */
2102                                 /* lock it so it will not be closed on us */
2103                                 cifsFileInfo_get(open_file);
2104                                 spin_unlock(&cifs_inode->open_file_lock);
2105                                 return open_file;
2106                         } /* else might as well continue, and look for
2107                              another, or simply have the caller reopen it
2108                              again rather than trying to fix this handle */
2109                 } else /* write only file */
2110                         break; /* write only files are last so must be done */
2111         }
2112         spin_unlock(&cifs_inode->open_file_lock);
2113         return NULL;
2114 }
2115
2116 /* Return -EBADF if no handle is found and general rc otherwise */
2117 int
2118 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2119                        struct cifsFileInfo **ret_file)
2120 {
2121         struct cifsFileInfo *open_file, *inv_file = NULL;
2122         struct cifs_sb_info *cifs_sb;
2123         bool any_available = false;
2124         int rc = -EBADF;
2125         unsigned int refind = 0;
2126         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2127         bool with_delete = flags & FIND_WR_WITH_DELETE;
2128         *ret_file = NULL;
2129
2130         /*
2131          * Having a null inode here (because mapping->host was set to zero by
2132          * the VFS or MM) should not happen but we had reports of on oops (due
2133          * to it being zero) during stress testcases so we need to check for it
2134          */
2135
2136         if (cifs_inode == NULL) {
2137                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2138                 dump_stack();
2139                 return rc;
2140         }
2141
2142         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2143
2144         /* only filter by fsuid on multiuser mounts */
2145         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2146                 fsuid_only = false;
2147
2148         spin_lock(&cifs_inode->open_file_lock);
2149 refind_writable:
2150         if (refind > MAX_REOPEN_ATT) {
2151                 spin_unlock(&cifs_inode->open_file_lock);
2152                 return rc;
2153         }
2154         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2155                 if (!any_available && open_file->pid != current->tgid)
2156                         continue;
2157                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2158                         continue;
2159                 if (with_delete && !(open_file->fid.access & DELETE))
2160                         continue;
2161                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2162                         if (!open_file->invalidHandle) {
2163                                 /* found a good writable file */
2164                                 cifsFileInfo_get(open_file);
2165                                 spin_unlock(&cifs_inode->open_file_lock);
2166                                 *ret_file = open_file;
2167                                 return 0;
2168                         } else {
2169                                 if (!inv_file)
2170                                         inv_file = open_file;
2171                         }
2172                 }
2173         }
2174         /* couldn't find useable FH with same pid, try any available */
2175         if (!any_available) {
2176                 any_available = true;
2177                 goto refind_writable;
2178         }
2179
2180         if (inv_file) {
2181                 any_available = false;
2182                 cifsFileInfo_get(inv_file);
2183         }
2184
2185         spin_unlock(&cifs_inode->open_file_lock);
2186
2187         if (inv_file) {
2188                 rc = cifs_reopen_file(inv_file, false);
2189                 if (!rc) {
2190                         *ret_file = inv_file;
2191                         return 0;
2192                 }
2193
2194                 spin_lock(&cifs_inode->open_file_lock);
2195                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2196                 spin_unlock(&cifs_inode->open_file_lock);
2197                 cifsFileInfo_put(inv_file);
2198                 ++refind;
2199                 inv_file = NULL;
2200                 spin_lock(&cifs_inode->open_file_lock);
2201                 goto refind_writable;
2202         }
2203
2204         return rc;
2205 }
2206
2207 struct cifsFileInfo *
2208 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2209 {
2210         struct cifsFileInfo *cfile;
2211         int rc;
2212
2213         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2214         if (rc)
2215                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2216
2217         return cfile;
2218 }
2219
2220 int
2221 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2222                        int flags,
2223                        struct cifsFileInfo **ret_file)
2224 {
2225         struct cifsFileInfo *cfile;
2226         void *page = alloc_dentry_path();
2227
2228         *ret_file = NULL;
2229
2230         spin_lock(&tcon->open_file_lock);
2231         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2232                 struct cifsInodeInfo *cinode;
2233                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2234                 if (IS_ERR(full_path)) {
2235                         spin_unlock(&tcon->open_file_lock);
2236                         free_dentry_path(page);
2237                         return PTR_ERR(full_path);
2238                 }
2239                 if (strcmp(full_path, name))
2240                         continue;
2241
2242                 cinode = CIFS_I(d_inode(cfile->dentry));
2243                 spin_unlock(&tcon->open_file_lock);
2244                 free_dentry_path(page);
2245                 return cifs_get_writable_file(cinode, flags, ret_file);
2246         }
2247
2248         spin_unlock(&tcon->open_file_lock);
2249         free_dentry_path(page);
2250         return -ENOENT;
2251 }
2252
2253 int
2254 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2255                        struct cifsFileInfo **ret_file)
2256 {
2257         struct cifsFileInfo *cfile;
2258         void *page = alloc_dentry_path();
2259
2260         *ret_file = NULL;
2261
2262         spin_lock(&tcon->open_file_lock);
2263         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2264                 struct cifsInodeInfo *cinode;
2265                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2266                 if (IS_ERR(full_path)) {
2267                         spin_unlock(&tcon->open_file_lock);
2268                         free_dentry_path(page);
2269                         return PTR_ERR(full_path);
2270                 }
2271                 if (strcmp(full_path, name))
2272                         continue;
2273
2274                 cinode = CIFS_I(d_inode(cfile->dentry));
2275                 spin_unlock(&tcon->open_file_lock);
2276                 free_dentry_path(page);
2277                 *ret_file = find_readable_file(cinode, 0);
2278                 return *ret_file ? 0 : -ENOENT;
2279         }
2280
2281         spin_unlock(&tcon->open_file_lock);
2282         free_dentry_path(page);
2283         return -ENOENT;
2284 }
2285
2286 void
2287 cifs_writedata_release(struct kref *refcount)
2288 {
2289         struct cifs_writedata *wdata = container_of(refcount,
2290                                         struct cifs_writedata, refcount);
2291 #ifdef CONFIG_CIFS_SMB_DIRECT
2292         if (wdata->mr) {
2293                 smbd_deregister_mr(wdata->mr);
2294                 wdata->mr = NULL;
2295         }
2296 #endif
2297
2298         if (wdata->cfile)
2299                 cifsFileInfo_put(wdata->cfile);
2300
2301         kvfree(wdata->pages);
2302         kfree(wdata);
2303 }
2304
2305 /*
2306  * Write failed with a retryable error. Resend the write request. It's also
2307  * possible that the page was redirtied so re-clean the page.
2308  */
2309 static void
2310 cifs_writev_requeue(struct cifs_writedata *wdata)
2311 {
2312         int i, rc = 0;
2313         struct inode *inode = d_inode(wdata->cfile->dentry);
2314         struct TCP_Server_Info *server;
2315         unsigned int rest_len;
2316
2317         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2318         i = 0;
2319         rest_len = wdata->bytes;
2320         do {
2321                 struct cifs_writedata *wdata2;
2322                 unsigned int j, nr_pages, wsize, tailsz, cur_len;
2323
2324                 wsize = server->ops->wp_retry_size(inode);
2325                 if (wsize < rest_len) {
2326                         nr_pages = wsize / PAGE_SIZE;
2327                         if (!nr_pages) {
2328                                 rc = -EOPNOTSUPP;
2329                                 break;
2330                         }
2331                         cur_len = nr_pages * PAGE_SIZE;
2332                         tailsz = PAGE_SIZE;
2333                 } else {
2334                         nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
2335                         cur_len = rest_len;
2336                         tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
2337                 }
2338
2339                 wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
2340                 if (!wdata2) {
2341                         rc = -ENOMEM;
2342                         break;
2343                 }
2344
2345                 for (j = 0; j < nr_pages; j++) {
2346                         wdata2->pages[j] = wdata->pages[i + j];
2347                         lock_page(wdata2->pages[j]);
2348                         clear_page_dirty_for_io(wdata2->pages[j]);
2349                 }
2350
2351                 wdata2->sync_mode = wdata->sync_mode;
2352                 wdata2->nr_pages = nr_pages;
2353                 wdata2->offset = page_offset(wdata2->pages[0]);
2354                 wdata2->pagesz = PAGE_SIZE;
2355                 wdata2->tailsz = tailsz;
2356                 wdata2->bytes = cur_len;
2357
2358                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2359                                             &wdata2->cfile);
2360                 if (!wdata2->cfile) {
2361                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2362                                  rc);
2363                         if (!is_retryable_error(rc))
2364                                 rc = -EBADF;
2365                 } else {
2366                         wdata2->pid = wdata2->cfile->pid;
2367                         rc = server->ops->async_writev(wdata2,
2368                                                        cifs_writedata_release);
2369                 }
2370
2371                 for (j = 0; j < nr_pages; j++) {
2372                         unlock_page(wdata2->pages[j]);
2373                         if (rc != 0 && !is_retryable_error(rc)) {
2374                                 SetPageError(wdata2->pages[j]);
2375                                 end_page_writeback(wdata2->pages[j]);
2376                                 put_page(wdata2->pages[j]);
2377                         }
2378                 }
2379
2380                 kref_put(&wdata2->refcount, cifs_writedata_release);
2381                 if (rc) {
2382                         if (is_retryable_error(rc))
2383                                 continue;
2384                         i += nr_pages;
2385                         break;
2386                 }
2387
2388                 rest_len -= cur_len;
2389                 i += nr_pages;
2390         } while (i < wdata->nr_pages);
2391
2392         /* cleanup remaining pages from the original wdata */
2393         for (; i < wdata->nr_pages; i++) {
2394                 SetPageError(wdata->pages[i]);
2395                 end_page_writeback(wdata->pages[i]);
2396                 put_page(wdata->pages[i]);
2397         }
2398
2399         if (rc != 0 && !is_retryable_error(rc))
2400                 mapping_set_error(inode->i_mapping, rc);
2401         kref_put(&wdata->refcount, cifs_writedata_release);
2402 }
2403
2404 void
2405 cifs_writev_complete(struct work_struct *work)
2406 {
2407         struct cifs_writedata *wdata = container_of(work,
2408                                                 struct cifs_writedata, work);
2409         struct inode *inode = d_inode(wdata->cfile->dentry);
2410         int i = 0;
2411
2412         if (wdata->result == 0) {
2413                 spin_lock(&inode->i_lock);
2414                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2415                 spin_unlock(&inode->i_lock);
2416                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2417                                          wdata->bytes);
2418         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2419                 return cifs_writev_requeue(wdata);
2420
2421         for (i = 0; i < wdata->nr_pages; i++) {
2422                 struct page *page = wdata->pages[i];
2423
2424                 if (wdata->result == -EAGAIN)
2425                         __set_page_dirty_nobuffers(page);
2426                 else if (wdata->result < 0)
2427                         SetPageError(page);
2428                 end_page_writeback(page);
2429                 cifs_readpage_to_fscache(inode, page);
2430                 put_page(page);
2431         }
2432         if (wdata->result != -EAGAIN)
2433                 mapping_set_error(inode->i_mapping, wdata->result);
2434         kref_put(&wdata->refcount, cifs_writedata_release);
2435 }
2436
2437 struct cifs_writedata *
2438 cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
2439 {
2440         struct cifs_writedata *writedata = NULL;
2441         struct page **pages =
2442                 kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
2443         if (pages) {
2444                 writedata = cifs_writedata_direct_alloc(pages, complete);
2445                 if (!writedata)
2446                         kvfree(pages);
2447         }
2448
2449         return writedata;
2450 }
2451
2452 struct cifs_writedata *
2453 cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
2454 {
2455         struct cifs_writedata *wdata;
2456
2457         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2458         if (wdata != NULL) {
2459                 wdata->pages = pages;
2460                 kref_init(&wdata->refcount);
2461                 INIT_LIST_HEAD(&wdata->list);
2462                 init_completion(&wdata->done);
2463                 INIT_WORK(&wdata->work, complete);
2464         }
2465         return wdata;
2466 }
2467
2468
2469 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2470 {
2471         struct address_space *mapping = page->mapping;
2472         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2473         char *write_data;
2474         int rc = -EFAULT;
2475         int bytes_written = 0;
2476         struct inode *inode;
2477         struct cifsFileInfo *open_file;
2478
2479         if (!mapping || !mapping->host)
2480                 return -EFAULT;
2481
2482         inode = page->mapping->host;
2483
2484         offset += (loff_t)from;
2485         write_data = kmap(page);
2486         write_data += from;
2487
2488         if ((to > PAGE_SIZE) || (from > to)) {
2489                 kunmap(page);
2490                 return -EIO;
2491         }
2492
2493         /* racing with truncate? */
2494         if (offset > mapping->host->i_size) {
2495                 kunmap(page);
2496                 return 0; /* don't care */
2497         }
2498
2499         /* check to make sure that we are not extending the file */
2500         if (mapping->host->i_size - offset < (loff_t)to)
2501                 to = (unsigned)(mapping->host->i_size - offset);
2502
2503         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2504                                     &open_file);
2505         if (!rc) {
2506                 bytes_written = cifs_write(open_file, open_file->pid,
2507                                            write_data, to - from, &offset);
2508                 cifsFileInfo_put(open_file);
2509                 /* Does mm or vfs already set times? */
2510                 inode->i_atime = inode->i_mtime = current_time(inode);
2511                 if ((bytes_written > 0) && (offset))
2512                         rc = 0;
2513                 else if (bytes_written < 0)
2514                         rc = bytes_written;
2515                 else
2516                         rc = -EFAULT;
2517         } else {
2518                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2519                 if (!is_retryable_error(rc))
2520                         rc = -EIO;
2521         }
2522
2523         kunmap(page);
2524         return rc;
2525 }
2526
2527 static struct cifs_writedata *
2528 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2529                           pgoff_t end, pgoff_t *index,
2530                           unsigned int *found_pages)
2531 {
2532         struct cifs_writedata *wdata;
2533
2534         wdata = cifs_writedata_alloc((unsigned int)tofind,
2535                                      cifs_writev_complete);
2536         if (!wdata)
2537                 return NULL;
2538
2539         *found_pages = find_get_pages_range_tag(mapping, index, end,
2540                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2541         return wdata;
2542 }
2543
2544 static unsigned int
2545 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2546                     struct address_space *mapping,
2547                     struct writeback_control *wbc,
2548                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2549 {
2550         unsigned int nr_pages = 0, i;
2551         struct page *page;
2552
2553         for (i = 0; i < found_pages; i++) {
2554                 page = wdata->pages[i];
2555                 /*
2556                  * At this point we hold neither the i_pages lock nor the
2557                  * page lock: the page may be truncated or invalidated
2558                  * (changing page->mapping to NULL), or even swizzled
2559                  * back from swapper_space to tmpfs file mapping
2560                  */
2561
2562                 if (nr_pages == 0)
2563                         lock_page(page);
2564                 else if (!trylock_page(page))
2565                         break;
2566
2567                 if (unlikely(page->mapping != mapping)) {
2568                         unlock_page(page);
2569                         break;
2570                 }
2571
2572                 if (!wbc->range_cyclic && page->index > end) {
2573                         *done = true;
2574                         unlock_page(page);
2575                         break;
2576                 }
2577
2578                 if (*next && (page->index != *next)) {
2579                         /* Not next consecutive page */
2580                         unlock_page(page);
2581                         break;
2582                 }
2583
2584                 if (wbc->sync_mode != WB_SYNC_NONE)
2585                         wait_on_page_writeback(page);
2586
2587                 if (PageWriteback(page) ||
2588                                 !clear_page_dirty_for_io(page)) {
2589                         unlock_page(page);
2590                         break;
2591                 }
2592
2593                 /*
2594                  * This actually clears the dirty bit in the radix tree.
2595                  * See cifs_writepage() for more commentary.
2596                  */
2597                 set_page_writeback(page);
2598                 if (page_offset(page) >= i_size_read(mapping->host)) {
2599                         *done = true;
2600                         unlock_page(page);
2601                         end_page_writeback(page);
2602                         break;
2603                 }
2604
2605                 wdata->pages[i] = page;
2606                 *next = page->index + 1;
2607                 ++nr_pages;
2608         }
2609
2610         /* reset index to refind any pages skipped */
2611         if (nr_pages == 0)
2612                 *index = wdata->pages[0]->index + 1;
2613
2614         /* put any pages we aren't going to use */
2615         for (i = nr_pages; i < found_pages; i++) {
2616                 put_page(wdata->pages[i]);
2617                 wdata->pages[i] = NULL;
2618         }
2619
2620         return nr_pages;
2621 }
2622
2623 static int
2624 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2625                  struct address_space *mapping, struct writeback_control *wbc)
2626 {
2627         int rc;
2628
2629         wdata->sync_mode = wbc->sync_mode;
2630         wdata->nr_pages = nr_pages;
2631         wdata->offset = page_offset(wdata->pages[0]);
2632         wdata->pagesz = PAGE_SIZE;
2633         wdata->tailsz = min(i_size_read(mapping->host) -
2634                         page_offset(wdata->pages[nr_pages - 1]),
2635                         (loff_t)PAGE_SIZE);
2636         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2637         wdata->pid = wdata->cfile->pid;
2638
2639         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2640         if (rc)
2641                 return rc;
2642
2643         if (wdata->cfile->invalidHandle)
2644                 rc = -EAGAIN;
2645         else
2646                 rc = wdata->server->ops->async_writev(wdata,
2647                                                       cifs_writedata_release);
2648
2649         return rc;
2650 }
2651
2652 static int cifs_writepages(struct address_space *mapping,
2653                            struct writeback_control *wbc)
2654 {
2655         struct inode *inode = mapping->host;
2656         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2657         struct TCP_Server_Info *server;
2658         bool done = false, scanned = false, range_whole = false;
2659         pgoff_t end, index;
2660         struct cifs_writedata *wdata;
2661         struct cifsFileInfo *cfile = NULL;
2662         int rc = 0;
2663         int saved_rc = 0;
2664         unsigned int xid;
2665
2666         /*
2667          * If wsize is smaller than the page cache size, default to writing
2668          * one page at a time via cifs_writepage
2669          */
2670         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2671                 return generic_writepages(mapping, wbc);
2672
2673         xid = get_xid();
2674         if (wbc->range_cyclic) {
2675                 index = mapping->writeback_index; /* Start from prev offset */
2676                 end = -1;
2677         } else {
2678                 index = wbc->range_start >> PAGE_SHIFT;
2679                 end = wbc->range_end >> PAGE_SHIFT;
2680                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2681                         range_whole = true;
2682                 scanned = true;
2683         }
2684         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2685
2686 retry:
2687         while (!done && index <= end) {
2688                 unsigned int i, nr_pages, found_pages, wsize;
2689                 pgoff_t next = 0, tofind, saved_index = index;
2690                 struct cifs_credits credits_on_stack;
2691                 struct cifs_credits *credits = &credits_on_stack;
2692                 int get_file_rc = 0;
2693
2694                 if (cfile)
2695                         cifsFileInfo_put(cfile);
2696
2697                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2698
2699                 /* in case of an error store it to return later */
2700                 if (rc)
2701                         get_file_rc = rc;
2702
2703                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2704                                                    &wsize, credits);
2705                 if (rc != 0) {
2706                         done = true;
2707                         break;
2708                 }
2709
2710                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2711
2712                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2713                                                   &found_pages);
2714                 if (!wdata) {
2715                         rc = -ENOMEM;
2716                         done = true;
2717                         add_credits_and_wake_if(server, credits, 0);
2718                         break;
2719                 }
2720
2721                 if (found_pages == 0) {
2722                         kref_put(&wdata->refcount, cifs_writedata_release);
2723                         add_credits_and_wake_if(server, credits, 0);
2724                         break;
2725                 }
2726
2727                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2728                                                end, &index, &next, &done);
2729
2730                 /* nothing to write? */
2731                 if (nr_pages == 0) {
2732                         kref_put(&wdata->refcount, cifs_writedata_release);
2733                         add_credits_and_wake_if(server, credits, 0);
2734                         continue;
2735                 }
2736
2737                 wdata->credits = credits_on_stack;
2738                 wdata->cfile = cfile;
2739                 wdata->server = server;
2740                 cfile = NULL;
2741
2742                 if (!wdata->cfile) {
2743                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2744                                  get_file_rc);
2745                         if (is_retryable_error(get_file_rc))
2746                                 rc = get_file_rc;
2747                         else
2748                                 rc = -EBADF;
2749                 } else
2750                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2751
2752                 for (i = 0; i < nr_pages; ++i)
2753                         unlock_page(wdata->pages[i]);
2754
2755                 /* send failure -- clean up the mess */
2756                 if (rc != 0) {
2757                         add_credits_and_wake_if(server, &wdata->credits, 0);
2758                         for (i = 0; i < nr_pages; ++i) {
2759                                 if (is_retryable_error(rc))
2760                                         redirty_page_for_writepage(wbc,
2761                                                            wdata->pages[i]);
2762                                 else
2763                                         SetPageError(wdata->pages[i]);
2764                                 end_page_writeback(wdata->pages[i]);
2765                                 put_page(wdata->pages[i]);
2766                         }
2767                         if (!is_retryable_error(rc))
2768                                 mapping_set_error(mapping, rc);
2769                 }
2770                 kref_put(&wdata->refcount, cifs_writedata_release);
2771
2772                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2773                         index = saved_index;
2774                         continue;
2775                 }
2776
2777                 /* Return immediately if we received a signal during writing */
2778                 if (is_interrupt_error(rc)) {
2779                         done = true;
2780                         break;
2781                 }
2782
2783                 if (rc != 0 && saved_rc == 0)
2784                         saved_rc = rc;
2785
2786                 wbc->nr_to_write -= nr_pages;
2787                 if (wbc->nr_to_write <= 0)
2788                         done = true;
2789
2790                 index = next;
2791         }
2792
2793         if (!scanned && !done) {
2794                 /*
2795                  * We hit the last page and there is more work to be done: wrap
2796                  * back to the start of the file
2797                  */
2798                 scanned = true;
2799                 index = 0;
2800                 goto retry;
2801         }
2802
2803         if (saved_rc != 0)
2804                 rc = saved_rc;
2805
2806         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2807                 mapping->writeback_index = index;
2808
2809         if (cfile)
2810                 cifsFileInfo_put(cfile);
2811         free_xid(xid);
2812         /* Indication to update ctime and mtime as close is deferred */
2813         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2814         return rc;
2815 }
2816
2817 static int
2818 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2819 {
2820         int rc;
2821         unsigned int xid;
2822
2823         xid = get_xid();
2824 /* BB add check for wbc flags */
2825         get_page(page);
2826         if (!PageUptodate(page))
2827                 cifs_dbg(FYI, "ppw - page not up to date\n");
2828
2829         /*
2830          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2831          *
2832          * A writepage() implementation always needs to do either this,
2833          * or re-dirty the page with "redirty_page_for_writepage()" in
2834          * the case of a failure.
2835          *
2836          * Just unlocking the page will cause the radix tree tag-bits
2837          * to fail to update with the state of the page correctly.
2838          */
2839         set_page_writeback(page);
2840 retry_write:
2841         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2842         if (is_retryable_error(rc)) {
2843                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2844                         goto retry_write;
2845                 redirty_page_for_writepage(wbc, page);
2846         } else if (rc != 0) {
2847                 SetPageError(page);
2848                 mapping_set_error(page->mapping, rc);
2849         } else {
2850                 SetPageUptodate(page);
2851         }
2852         end_page_writeback(page);
2853         put_page(page);
2854         free_xid(xid);
2855         return rc;
2856 }
2857
2858 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2859 {
2860         int rc = cifs_writepage_locked(page, wbc);
2861         unlock_page(page);
2862         return rc;
2863 }
2864
2865 static int cifs_write_end(struct file *file, struct address_space *mapping,
2866                         loff_t pos, unsigned len, unsigned copied,
2867                         struct page *page, void *fsdata)
2868 {
2869         int rc;
2870         struct inode *inode = mapping->host;
2871         struct cifsFileInfo *cfile = file->private_data;
2872         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2873         __u32 pid;
2874
2875         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2876                 pid = cfile->pid;
2877         else
2878                 pid = current->tgid;
2879
2880         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2881                  page, pos, copied);
2882
2883         if (PageChecked(page)) {
2884                 if (copied == len)
2885                         SetPageUptodate(page);
2886                 ClearPageChecked(page);
2887         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2888                 SetPageUptodate(page);
2889
2890         if (!PageUptodate(page)) {
2891                 char *page_data;
2892                 unsigned offset = pos & (PAGE_SIZE - 1);
2893                 unsigned int xid;
2894
2895                 xid = get_xid();
2896                 /* this is probably better than directly calling
2897                    partialpage_write since in this function the file handle is
2898                    known which we might as well leverage */
2899                 /* BB check if anything else missing out of ppw
2900                    such as updating last write time */
2901                 page_data = kmap(page);
2902                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2903                 /* if (rc < 0) should we set writebehind rc? */
2904                 kunmap(page);
2905
2906                 free_xid(xid);
2907         } else {
2908                 rc = copied;
2909                 pos += copied;
2910                 set_page_dirty(page);
2911         }
2912
2913         if (rc > 0) {
2914                 spin_lock(&inode->i_lock);
2915                 if (pos > inode->i_size) {
2916                         i_size_write(inode, pos);
2917                         inode->i_blocks = (512 - 1 + pos) >> 9;
2918                 }
2919                 spin_unlock(&inode->i_lock);
2920         }
2921
2922         unlock_page(page);
2923         put_page(page);
2924         /* Indication to update ctime and mtime as close is deferred */
2925         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2926
2927         return rc;
2928 }
2929
2930 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2931                       int datasync)
2932 {
2933         unsigned int xid;
2934         int rc = 0;
2935         struct cifs_tcon *tcon;
2936         struct TCP_Server_Info *server;
2937         struct cifsFileInfo *smbfile = file->private_data;
2938         struct inode *inode = file_inode(file);
2939         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2940
2941         rc = file_write_and_wait_range(file, start, end);
2942         if (rc) {
2943                 trace_cifs_fsync_err(inode->i_ino, rc);
2944                 return rc;
2945         }
2946
2947         xid = get_xid();
2948
2949         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2950                  file, datasync);
2951
2952         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2953                 rc = cifs_zap_mapping(inode);
2954                 if (rc) {
2955                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2956                         rc = 0; /* don't care about it in fsync */
2957                 }
2958         }
2959
2960         tcon = tlink_tcon(smbfile->tlink);
2961         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2962                 server = tcon->ses->server;
2963                 if (server->ops->flush == NULL) {
2964                         rc = -ENOSYS;
2965                         goto strict_fsync_exit;
2966                 }
2967
2968                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2969                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2970                         if (smbfile) {
2971                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2972                                 cifsFileInfo_put(smbfile);
2973                         } else
2974                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2975                 } else
2976                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2977         }
2978
2979 strict_fsync_exit:
2980         free_xid(xid);
2981         return rc;
2982 }
2983
2984 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2985 {
2986         unsigned int xid;
2987         int rc = 0;
2988         struct cifs_tcon *tcon;
2989         struct TCP_Server_Info *server;
2990         struct cifsFileInfo *smbfile = file->private_data;
2991         struct inode *inode = file_inode(file);
2992         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2993
2994         rc = file_write_and_wait_range(file, start, end);
2995         if (rc) {
2996                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2997                 return rc;
2998         }
2999
3000         xid = get_xid();
3001
3002         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3003                  file, datasync);
3004
3005         tcon = tlink_tcon(smbfile->tlink);
3006         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3007                 server = tcon->ses->server;
3008                 if (server->ops->flush == NULL) {
3009                         rc = -ENOSYS;
3010                         goto fsync_exit;
3011                 }
3012
3013                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3014                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3015                         if (smbfile) {
3016                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3017                                 cifsFileInfo_put(smbfile);
3018                         } else
3019                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3020                 } else
3021                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3022         }
3023
3024 fsync_exit:
3025         free_xid(xid);
3026         return rc;
3027 }
3028
3029 /*
3030  * As file closes, flush all cached write data for this inode checking
3031  * for write behind errors.
3032  */
3033 int cifs_flush(struct file *file, fl_owner_t id)
3034 {
3035         struct inode *inode = file_inode(file);
3036         int rc = 0;
3037
3038         if (file->f_mode & FMODE_WRITE)
3039                 rc = filemap_write_and_wait(inode->i_mapping);
3040
3041         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3042         if (rc) {
3043                 /* get more nuanced writeback errors */
3044                 rc = filemap_check_wb_err(file->f_mapping, 0);
3045                 trace_cifs_flush_err(inode->i_ino, rc);
3046         }
3047         return rc;
3048 }
3049
3050 static int
3051 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
3052 {
3053         int rc = 0;
3054         unsigned long i;
3055
3056         for (i = 0; i < num_pages; i++) {
3057                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3058                 if (!pages[i]) {
3059                         /*
3060                          * save number of pages we have already allocated and
3061                          * return with ENOMEM error
3062                          */
3063                         num_pages = i;
3064                         rc = -ENOMEM;
3065                         break;
3066                 }
3067         }
3068
3069         if (rc) {
3070                 for (i = 0; i < num_pages; i++)
3071                         put_page(pages[i]);
3072         }
3073         return rc;
3074 }
3075
3076 static inline
3077 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
3078 {
3079         size_t num_pages;
3080         size_t clen;
3081
3082         clen = min_t(const size_t, len, wsize);
3083         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
3084
3085         if (cur_len)
3086                 *cur_len = clen;
3087
3088         return num_pages;
3089 }
3090
3091 static void
3092 cifs_uncached_writedata_release(struct kref *refcount)
3093 {
3094         int i;
3095         struct cifs_writedata *wdata = container_of(refcount,
3096                                         struct cifs_writedata, refcount);
3097
3098         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3099         for (i = 0; i < wdata->nr_pages; i++)
3100                 put_page(wdata->pages[i]);
3101         cifs_writedata_release(refcount);
3102 }
3103
3104 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3105
3106 static void
3107 cifs_uncached_writev_complete(struct work_struct *work)
3108 {
3109         struct cifs_writedata *wdata = container_of(work,
3110                                         struct cifs_writedata, work);
3111         struct inode *inode = d_inode(wdata->cfile->dentry);
3112         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3113
3114         spin_lock(&inode->i_lock);
3115         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3116         if (cifsi->server_eof > inode->i_size)
3117                 i_size_write(inode, cifsi->server_eof);
3118         spin_unlock(&inode->i_lock);
3119
3120         complete(&wdata->done);
3121         collect_uncached_write_data(wdata->ctx);
3122         /* the below call can possibly free the last ref to aio ctx */
3123         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3124 }
3125
3126 static int
3127 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
3128                       size_t *len, unsigned long *num_pages)
3129 {
3130         size_t save_len, copied, bytes, cur_len = *len;
3131         unsigned long i, nr_pages = *num_pages;
3132
3133         save_len = cur_len;
3134         for (i = 0; i < nr_pages; i++) {
3135                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
3136                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
3137                 cur_len -= copied;
3138                 /*
3139                  * If we didn't copy as much as we expected, then that
3140                  * may mean we trod into an unmapped area. Stop copying
3141                  * at that point. On the next pass through the big
3142                  * loop, we'll likely end up getting a zero-length
3143                  * write and bailing out of it.
3144                  */
3145                 if (copied < bytes)
3146                         break;
3147         }
3148         cur_len = save_len - cur_len;
3149         *len = cur_len;
3150
3151         /*
3152          * If we have no data to send, then that probably means that
3153          * the copy above failed altogether. That's most likely because
3154          * the address in the iovec was bogus. Return -EFAULT and let
3155          * the caller free anything we allocated and bail out.
3156          */
3157         if (!cur_len)
3158                 return -EFAULT;
3159
3160         /*
3161          * i + 1 now represents the number of pages we actually used in
3162          * the copy phase above.
3163          */
3164         *num_pages = i + 1;
3165         return 0;
3166 }
3167
3168 static int
3169 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3170         struct cifs_aio_ctx *ctx)
3171 {
3172         unsigned int wsize;
3173         struct cifs_credits credits;
3174         int rc;
3175         struct TCP_Server_Info *server = wdata->server;
3176
3177         do {
3178                 if (wdata->cfile->invalidHandle) {
3179                         rc = cifs_reopen_file(wdata->cfile, false);
3180                         if (rc == -EAGAIN)
3181                                 continue;
3182                         else if (rc)
3183                                 break;
3184                 }
3185
3186
3187                 /*
3188                  * Wait for credits to resend this wdata.
3189                  * Note: we are attempting to resend the whole wdata not in
3190                  * segments
3191                  */
3192                 do {
3193                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3194                                                 &wsize, &credits);
3195                         if (rc)
3196                                 goto fail;
3197
3198                         if (wsize < wdata->bytes) {
3199                                 add_credits_and_wake_if(server, &credits, 0);
3200                                 msleep(1000);
3201                         }
3202                 } while (wsize < wdata->bytes);
3203                 wdata->credits = credits;
3204
3205                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3206
3207                 if (!rc) {
3208                         if (wdata->cfile->invalidHandle)
3209                                 rc = -EAGAIN;
3210                         else {
3211 #ifdef CONFIG_CIFS_SMB_DIRECT
3212                                 if (wdata->mr) {
3213                                         wdata->mr->need_invalidate = true;
3214                                         smbd_deregister_mr(wdata->mr);
3215                                         wdata->mr = NULL;
3216                                 }
3217 #endif
3218                                 rc = server->ops->async_writev(wdata,
3219                                         cifs_uncached_writedata_release);
3220                         }
3221                 }
3222
3223                 /* If the write was successfully sent, we are done */
3224                 if (!rc) {
3225                         list_add_tail(&wdata->list, wdata_list);
3226                         return 0;
3227                 }
3228
3229                 /* Roll back credits and retry if needed */
3230                 add_credits_and_wake_if(server, &wdata->credits, 0);
3231         } while (rc == -EAGAIN);
3232
3233 fail:
3234         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3235         return rc;
3236 }
3237
3238 static int
3239 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
3240                      struct cifsFileInfo *open_file,
3241                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3242                      struct cifs_aio_ctx *ctx)
3243 {
3244         int rc = 0;
3245         size_t cur_len;
3246         unsigned long nr_pages, num_pages, i;
3247         struct cifs_writedata *wdata;
3248         struct iov_iter saved_from = *from;
3249         loff_t saved_offset = offset;
3250         pid_t pid;
3251         struct TCP_Server_Info *server;
3252         struct page **pagevec;
3253         size_t start;
3254         unsigned int xid;
3255
3256         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3257                 pid = open_file->pid;
3258         else
3259                 pid = current->tgid;
3260
3261         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3262         xid = get_xid();
3263
3264         do {
3265                 unsigned int wsize;
3266                 struct cifs_credits credits_on_stack;
3267                 struct cifs_credits *credits = &credits_on_stack;
3268
3269                 if (open_file->invalidHandle) {
3270                         rc = cifs_reopen_file(open_file, false);
3271                         if (rc == -EAGAIN)
3272                                 continue;
3273                         else if (rc)
3274                                 break;
3275                 }
3276
3277                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3278                                                    &wsize, credits);
3279                 if (rc)
3280                         break;
3281
3282                 cur_len = min_t(const size_t, len, wsize);
3283
3284                 if (ctx->direct_io) {
3285                         ssize_t result;
3286
3287                         result = iov_iter_get_pages_alloc2(
3288                                 from, &pagevec, cur_len, &start);
3289                         if (result < 0) {
3290                                 cifs_dbg(VFS,
3291                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3292                                          result, iov_iter_type(from),
3293                                          from->iov_offset, from->count);
3294                                 dump_stack();
3295
3296                                 rc = result;
3297                                 add_credits_and_wake_if(server, credits, 0);
3298                                 break;
3299                         }
3300                         cur_len = (size_t)result;
3301
3302                         nr_pages =
3303                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3304
3305                         wdata = cifs_writedata_direct_alloc(pagevec,
3306                                              cifs_uncached_writev_complete);
3307                         if (!wdata) {
3308                                 rc = -ENOMEM;
3309                                 for (i = 0; i < nr_pages; i++)
3310                                         put_page(pagevec[i]);
3311                                 kvfree(pagevec);
3312                                 add_credits_and_wake_if(server, credits, 0);
3313                                 break;
3314                         }
3315
3316
3317                         wdata->page_offset = start;
3318                         wdata->tailsz =
3319                                 nr_pages > 1 ?
3320                                         cur_len - (PAGE_SIZE - start) -
3321                                         (nr_pages - 2) * PAGE_SIZE :
3322                                         cur_len;
3323                 } else {
3324                         nr_pages = get_numpages(wsize, len, &cur_len);
3325                         wdata = cifs_writedata_alloc(nr_pages,
3326                                              cifs_uncached_writev_complete);
3327                         if (!wdata) {
3328                                 rc = -ENOMEM;
3329                                 add_credits_and_wake_if(server, credits, 0);
3330                                 break;
3331                         }
3332
3333                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3334                         if (rc) {
3335                                 kvfree(wdata->pages);
3336                                 kfree(wdata);
3337                                 add_credits_and_wake_if(server, credits, 0);
3338                                 break;
3339                         }
3340
3341                         num_pages = nr_pages;
3342                         rc = wdata_fill_from_iovec(
3343                                 wdata, from, &cur_len, &num_pages);
3344                         if (rc) {
3345                                 for (i = 0; i < nr_pages; i++)
3346                                         put_page(wdata->pages[i]);
3347                                 kvfree(wdata->pages);
3348                                 kfree(wdata);
3349                                 add_credits_and_wake_if(server, credits, 0);
3350                                 break;
3351                         }
3352
3353                         /*
3354                          * Bring nr_pages down to the number of pages we
3355                          * actually used, and free any pages that we didn't use.
3356                          */
3357                         for ( ; nr_pages > num_pages; nr_pages--)
3358                                 put_page(wdata->pages[nr_pages - 1]);
3359
3360                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3361                 }
3362
3363                 wdata->sync_mode = WB_SYNC_ALL;
3364                 wdata->nr_pages = nr_pages;
3365                 wdata->offset = (__u64)offset;
3366                 wdata->cfile = cifsFileInfo_get(open_file);
3367                 wdata->server = server;
3368                 wdata->pid = pid;
3369                 wdata->bytes = cur_len;
3370                 wdata->pagesz = PAGE_SIZE;
3371                 wdata->credits = credits_on_stack;
3372                 wdata->ctx = ctx;
3373                 kref_get(&ctx->refcount);
3374
3375                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3376
3377                 if (!rc) {
3378                         if (wdata->cfile->invalidHandle)
3379                                 rc = -EAGAIN;
3380                         else
3381                                 rc = server->ops->async_writev(wdata,
3382                                         cifs_uncached_writedata_release);
3383                 }
3384
3385                 if (rc) {
3386                         add_credits_and_wake_if(server, &wdata->credits, 0);
3387                         kref_put(&wdata->refcount,
3388                                  cifs_uncached_writedata_release);
3389                         if (rc == -EAGAIN) {
3390                                 *from = saved_from;
3391                                 iov_iter_advance(from, offset - saved_offset);
3392                                 continue;
3393                         }
3394                         break;
3395                 }
3396
3397                 list_add_tail(&wdata->list, wdata_list);
3398                 offset += cur_len;
3399                 len -= cur_len;
3400         } while (len > 0);
3401
3402         free_xid(xid);
3403         return rc;
3404 }
3405
3406 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3407 {
3408         struct cifs_writedata *wdata, *tmp;
3409         struct cifs_tcon *tcon;
3410         struct cifs_sb_info *cifs_sb;
3411         struct dentry *dentry = ctx->cfile->dentry;
3412         ssize_t rc;
3413
3414         tcon = tlink_tcon(ctx->cfile->tlink);
3415         cifs_sb = CIFS_SB(dentry->d_sb);
3416
3417         mutex_lock(&ctx->aio_mutex);
3418
3419         if (list_empty(&ctx->list)) {
3420                 mutex_unlock(&ctx->aio_mutex);
3421                 return;
3422         }
3423
3424         rc = ctx->rc;
3425         /*
3426          * Wait for and collect replies for any successful sends in order of
3427          * increasing offset. Once an error is hit, then return without waiting
3428          * for any more replies.
3429          */
3430 restart_loop:
3431         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3432                 if (!rc) {
3433                         if (!try_wait_for_completion(&wdata->done)) {
3434                                 mutex_unlock(&ctx->aio_mutex);
3435                                 return;
3436                         }
3437
3438                         if (wdata->result)
3439                                 rc = wdata->result;
3440                         else
3441                                 ctx->total_len += wdata->bytes;
3442
3443                         /* resend call if it's a retryable error */
3444                         if (rc == -EAGAIN) {
3445                                 struct list_head tmp_list;
3446                                 struct iov_iter tmp_from = ctx->iter;
3447
3448                                 INIT_LIST_HEAD(&tmp_list);
3449                                 list_del_init(&wdata->list);
3450
3451                                 if (ctx->direct_io)
3452                                         rc = cifs_resend_wdata(
3453                                                 wdata, &tmp_list, ctx);
3454                                 else {
3455                                         iov_iter_advance(&tmp_from,
3456                                                  wdata->offset - ctx->pos);
3457
3458                                         rc = cifs_write_from_iter(wdata->offset,
3459                                                 wdata->bytes, &tmp_from,
3460                                                 ctx->cfile, cifs_sb, &tmp_list,
3461                                                 ctx);
3462
3463                                         kref_put(&wdata->refcount,
3464                                                 cifs_uncached_writedata_release);
3465                                 }
3466
3467                                 list_splice(&tmp_list, &ctx->list);
3468                                 goto restart_loop;
3469                         }
3470                 }
3471                 list_del_init(&wdata->list);
3472                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3473         }
3474
3475         cifs_stats_bytes_written(tcon, ctx->total_len);
3476         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3477
3478         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3479
3480         mutex_unlock(&ctx->aio_mutex);
3481
3482         if (ctx->iocb && ctx->iocb->ki_complete)
3483                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3484         else
3485                 complete(&ctx->done);
3486 }
3487
3488 static ssize_t __cifs_writev(
3489         struct kiocb *iocb, struct iov_iter *from, bool direct)
3490 {
3491         struct file *file = iocb->ki_filp;
3492         ssize_t total_written = 0;
3493         struct cifsFileInfo *cfile;
3494         struct cifs_tcon *tcon;
3495         struct cifs_sb_info *cifs_sb;
3496         struct cifs_aio_ctx *ctx;
3497         struct iov_iter saved_from = *from;
3498         size_t len = iov_iter_count(from);
3499         int rc;
3500
3501         /*
3502          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3503          * In this case, fall back to non-direct write function.
3504          * this could be improved by getting pages directly in ITER_KVEC
3505          */
3506         if (direct && iov_iter_is_kvec(from)) {
3507                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3508                 direct = false;
3509         }
3510
3511         rc = generic_write_checks(iocb, from);
3512         if (rc <= 0)
3513                 return rc;
3514
3515         cifs_sb = CIFS_FILE_SB(file);
3516         cfile = file->private_data;
3517         tcon = tlink_tcon(cfile->tlink);
3518
3519         if (!tcon->ses->server->ops->async_writev)
3520                 return -ENOSYS;
3521
3522         ctx = cifs_aio_ctx_alloc();
3523         if (!ctx)
3524                 return -ENOMEM;
3525
3526         ctx->cfile = cifsFileInfo_get(cfile);
3527
3528         if (!is_sync_kiocb(iocb))
3529                 ctx->iocb = iocb;
3530
3531         ctx->pos = iocb->ki_pos;
3532
3533         if (direct) {
3534                 ctx->direct_io = true;
3535                 ctx->iter = *from;
3536                 ctx->len = len;
3537         } else {
3538                 rc = setup_aio_ctx_iter(ctx, from, ITER_SOURCE);
3539                 if (rc) {
3540                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3541                         return rc;
3542                 }
3543         }
3544
3545         /* grab a lock here due to read response handlers can access ctx */
3546         mutex_lock(&ctx->aio_mutex);
3547
3548         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3549                                   cfile, cifs_sb, &ctx->list, ctx);
3550
3551         /*
3552          * If at least one write was successfully sent, then discard any rc
3553          * value from the later writes. If the other write succeeds, then
3554          * we'll end up returning whatever was written. If it fails, then
3555          * we'll get a new rc value from that.
3556          */
3557         if (!list_empty(&ctx->list))
3558                 rc = 0;
3559
3560         mutex_unlock(&ctx->aio_mutex);
3561
3562         if (rc) {
3563                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3564                 return rc;
3565         }
3566
3567         if (!is_sync_kiocb(iocb)) {
3568                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3569                 return -EIOCBQUEUED;
3570         }
3571
3572         rc = wait_for_completion_killable(&ctx->done);
3573         if (rc) {
3574                 mutex_lock(&ctx->aio_mutex);
3575                 ctx->rc = rc = -EINTR;
3576                 total_written = ctx->total_len;
3577                 mutex_unlock(&ctx->aio_mutex);
3578         } else {
3579                 rc = ctx->rc;
3580                 total_written = ctx->total_len;
3581         }
3582
3583         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3584
3585         if (unlikely(!total_written))
3586                 return rc;
3587
3588         iocb->ki_pos += total_written;
3589         return total_written;
3590 }
3591
3592 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3593 {
3594         struct file *file = iocb->ki_filp;
3595
3596         cifs_revalidate_mapping(file->f_inode);
3597         return __cifs_writev(iocb, from, true);
3598 }
3599
3600 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3601 {
3602         return __cifs_writev(iocb, from, false);
3603 }
3604
3605 static ssize_t
3606 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3607 {
3608         struct file *file = iocb->ki_filp;
3609         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3610         struct inode *inode = file->f_mapping->host;
3611         struct cifsInodeInfo *cinode = CIFS_I(inode);
3612         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3613         ssize_t rc;
3614
3615         inode_lock(inode);
3616         /*
3617          * We need to hold the sem to be sure nobody modifies lock list
3618          * with a brlock that prevents writing.
3619          */
3620         down_read(&cinode->lock_sem);
3621
3622         rc = generic_write_checks(iocb, from);
3623         if (rc <= 0)
3624                 goto out;
3625
3626         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3627                                      server->vals->exclusive_lock_type, 0,
3628                                      NULL, CIFS_WRITE_OP))
3629                 rc = __generic_file_write_iter(iocb, from);
3630         else
3631                 rc = -EACCES;
3632 out:
3633         up_read(&cinode->lock_sem);
3634         inode_unlock(inode);
3635
3636         if (rc > 0)
3637                 rc = generic_write_sync(iocb, rc);
3638         return rc;
3639 }
3640
3641 ssize_t
3642 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3643 {
3644         struct inode *inode = file_inode(iocb->ki_filp);
3645         struct cifsInodeInfo *cinode = CIFS_I(inode);
3646         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3647         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3648                                                 iocb->ki_filp->private_data;
3649         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3650         ssize_t written;
3651
3652         written = cifs_get_writer(cinode);
3653         if (written)
3654                 return written;
3655
3656         if (CIFS_CACHE_WRITE(cinode)) {
3657                 if (cap_unix(tcon->ses) &&
3658                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3659                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3660                         written = generic_file_write_iter(iocb, from);
3661                         goto out;
3662                 }
3663                 written = cifs_writev(iocb, from);
3664                 goto out;
3665         }
3666         /*
3667          * For non-oplocked files in strict cache mode we need to write the data
3668          * to the server exactly from the pos to pos+len-1 rather than flush all
3669          * affected pages because it may cause a error with mandatory locks on
3670          * these pages but not on the region from pos to ppos+len-1.
3671          */
3672         written = cifs_user_writev(iocb, from);
3673         if (CIFS_CACHE_READ(cinode)) {
3674                 /*
3675                  * We have read level caching and we have just sent a write
3676                  * request to the server thus making data in the cache stale.
3677                  * Zap the cache and set oplock/lease level to NONE to avoid
3678                  * reading stale data from the cache. All subsequent read
3679                  * operations will read new data from the server.
3680                  */
3681                 cifs_zap_mapping(inode);
3682                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3683                          inode);
3684                 cinode->oplock = 0;
3685         }
3686 out:
3687         cifs_put_writer(cinode);
3688         return written;
3689 }
3690
3691 static struct cifs_readdata *
3692 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3693 {
3694         struct cifs_readdata *rdata;
3695
3696         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3697         if (rdata != NULL) {
3698                 rdata->pages = pages;
3699                 kref_init(&rdata->refcount);
3700                 INIT_LIST_HEAD(&rdata->list);
3701                 init_completion(&rdata->done);
3702                 INIT_WORK(&rdata->work, complete);
3703         }
3704
3705         return rdata;
3706 }
3707
3708 static struct cifs_readdata *
3709 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3710 {
3711         struct page **pages =
3712                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3713         struct cifs_readdata *ret = NULL;
3714
3715         if (pages) {
3716                 ret = cifs_readdata_direct_alloc(pages, complete);
3717                 if (!ret)
3718                         kfree(pages);
3719         }
3720
3721         return ret;
3722 }
3723
3724 void
3725 cifs_readdata_release(struct kref *refcount)
3726 {
3727         struct cifs_readdata *rdata = container_of(refcount,
3728                                         struct cifs_readdata, refcount);
3729 #ifdef CONFIG_CIFS_SMB_DIRECT
3730         if (rdata->mr) {
3731                 smbd_deregister_mr(rdata->mr);
3732                 rdata->mr = NULL;
3733         }
3734 #endif
3735         if (rdata->cfile)
3736                 cifsFileInfo_put(rdata->cfile);
3737
3738         kvfree(rdata->pages);
3739         kfree(rdata);
3740 }
3741
3742 static int
3743 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3744 {
3745         int rc = 0;
3746         struct page *page;
3747         unsigned int i;
3748
3749         for (i = 0; i < nr_pages; i++) {
3750                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3751                 if (!page) {
3752                         rc = -ENOMEM;
3753                         break;
3754                 }
3755                 rdata->pages[i] = page;
3756         }
3757
3758         if (rc) {
3759                 unsigned int nr_page_failed = i;
3760
3761                 for (i = 0; i < nr_page_failed; i++) {
3762                         put_page(rdata->pages[i]);
3763                         rdata->pages[i] = NULL;
3764                 }
3765         }
3766         return rc;
3767 }
3768
3769 static void
3770 cifs_uncached_readdata_release(struct kref *refcount)
3771 {
3772         struct cifs_readdata *rdata = container_of(refcount,
3773                                         struct cifs_readdata, refcount);
3774         unsigned int i;
3775
3776         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3777         for (i = 0; i < rdata->nr_pages; i++) {
3778                 put_page(rdata->pages[i]);
3779         }
3780         cifs_readdata_release(refcount);
3781 }
3782
3783 /**
3784  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3785  * @rdata:      the readdata response with list of pages holding data
3786  * @iter:       destination for our data
3787  *
3788  * This function copies data from a list of pages in a readdata response into
3789  * an array of iovecs. It will first calculate where the data should go
3790  * based on the info in the readdata and then copy the data into that spot.
3791  */
3792 static int
3793 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3794 {
3795         size_t remaining = rdata->got_bytes;
3796         unsigned int i;
3797
3798         for (i = 0; i < rdata->nr_pages; i++) {
3799                 struct page *page = rdata->pages[i];
3800                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3801                 size_t written;
3802
3803                 if (unlikely(iov_iter_is_pipe(iter))) {
3804                         void *addr = kmap_atomic(page);
3805
3806                         written = copy_to_iter(addr, copy, iter);
3807                         kunmap_atomic(addr);
3808                 } else
3809                         written = copy_page_to_iter(page, 0, copy, iter);
3810                 remaining -= written;
3811                 if (written < copy && iov_iter_count(iter) > 0)
3812                         break;
3813         }
3814         return remaining ? -EFAULT : 0;
3815 }
3816
3817 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3818
3819 static void
3820 cifs_uncached_readv_complete(struct work_struct *work)
3821 {
3822         struct cifs_readdata *rdata = container_of(work,
3823                                                 struct cifs_readdata, work);
3824
3825         complete(&rdata->done);
3826         collect_uncached_read_data(rdata->ctx);
3827         /* the below call can possibly free the last ref to aio ctx */
3828         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3829 }
3830
3831 static int
3832 uncached_fill_pages(struct TCP_Server_Info *server,
3833                     struct cifs_readdata *rdata, struct iov_iter *iter,
3834                     unsigned int len)
3835 {
3836         int result = 0;
3837         unsigned int i;
3838         unsigned int nr_pages = rdata->nr_pages;
3839         unsigned int page_offset = rdata->page_offset;
3840
3841         rdata->got_bytes = 0;
3842         rdata->tailsz = PAGE_SIZE;
3843         for (i = 0; i < nr_pages; i++) {
3844                 struct page *page = rdata->pages[i];
3845                 size_t n;
3846                 unsigned int segment_size = rdata->pagesz;
3847
3848                 if (i == 0)
3849                         segment_size -= page_offset;
3850                 else
3851                         page_offset = 0;
3852
3853
3854                 if (len <= 0) {
3855                         /* no need to hold page hostage */
3856                         rdata->pages[i] = NULL;
3857                         rdata->nr_pages--;
3858                         put_page(page);
3859                         continue;
3860                 }
3861
3862                 n = len;
3863                 if (len >= segment_size)
3864                         /* enough data to fill the page */
3865                         n = segment_size;
3866                 else
3867                         rdata->tailsz = len;
3868                 len -= n;
3869
3870                 if (iter)
3871                         result = copy_page_from_iter(
3872                                         page, page_offset, n, iter);
3873 #ifdef CONFIG_CIFS_SMB_DIRECT
3874                 else if (rdata->mr)
3875                         result = n;
3876 #endif
3877                 else
3878                         result = cifs_read_page_from_socket(
3879                                         server, page, page_offset, n);
3880                 if (result < 0)
3881                         break;
3882
3883                 rdata->got_bytes += result;
3884         }
3885
3886         return result != -ECONNABORTED && rdata->got_bytes > 0 ?
3887                                                 rdata->got_bytes : result;
3888 }
3889
3890 static int
3891 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3892                               struct cifs_readdata *rdata, unsigned int len)
3893 {
3894         return uncached_fill_pages(server, rdata, NULL, len);
3895 }
3896
3897 static int
3898 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3899                               struct cifs_readdata *rdata,
3900                               struct iov_iter *iter)
3901 {
3902         return uncached_fill_pages(server, rdata, iter, iter->count);
3903 }
3904
3905 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3906                         struct list_head *rdata_list,
3907                         struct cifs_aio_ctx *ctx)
3908 {
3909         unsigned int rsize;
3910         struct cifs_credits credits;
3911         int rc;
3912         struct TCP_Server_Info *server;
3913
3914         /* XXX: should we pick a new channel here? */
3915         server = rdata->server;
3916
3917         do {
3918                 if (rdata->cfile->invalidHandle) {
3919                         rc = cifs_reopen_file(rdata->cfile, true);
3920                         if (rc == -EAGAIN)
3921                                 continue;
3922                         else if (rc)
3923                                 break;
3924                 }
3925
3926                 /*
3927                  * Wait for credits to resend this rdata.
3928                  * Note: we are attempting to resend the whole rdata not in
3929                  * segments
3930                  */
3931                 do {
3932                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3933                                                 &rsize, &credits);
3934
3935                         if (rc)
3936                                 goto fail;
3937
3938                         if (rsize < rdata->bytes) {
3939                                 add_credits_and_wake_if(server, &credits, 0);
3940                                 msleep(1000);
3941                         }
3942                 } while (rsize < rdata->bytes);
3943                 rdata->credits = credits;
3944
3945                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3946                 if (!rc) {
3947                         if (rdata->cfile->invalidHandle)
3948                                 rc = -EAGAIN;
3949                         else {
3950 #ifdef CONFIG_CIFS_SMB_DIRECT
3951                                 if (rdata->mr) {
3952                                         rdata->mr->need_invalidate = true;
3953                                         smbd_deregister_mr(rdata->mr);
3954                                         rdata->mr = NULL;
3955                                 }
3956 #endif
3957                                 rc = server->ops->async_readv(rdata);
3958                         }
3959                 }
3960
3961                 /* If the read was successfully sent, we are done */
3962                 if (!rc) {
3963                         /* Add to aio pending list */
3964                         list_add_tail(&rdata->list, rdata_list);
3965                         return 0;
3966                 }
3967
3968                 /* Roll back credits and retry if needed */
3969                 add_credits_and_wake_if(server, &rdata->credits, 0);
3970         } while (rc == -EAGAIN);
3971
3972 fail:
3973         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3974         return rc;
3975 }
3976
3977 static int
3978 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3979                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3980                      struct cifs_aio_ctx *ctx)
3981 {
3982         struct cifs_readdata *rdata;
3983         unsigned int npages, rsize;
3984         struct cifs_credits credits_on_stack;
3985         struct cifs_credits *credits = &credits_on_stack;
3986         size_t cur_len;
3987         int rc;
3988         pid_t pid;
3989         struct TCP_Server_Info *server;
3990         struct page **pagevec;
3991         size_t start;
3992         struct iov_iter direct_iov = ctx->iter;
3993
3994         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3995
3996         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3997                 pid = open_file->pid;
3998         else
3999                 pid = current->tgid;
4000
4001         if (ctx->direct_io)
4002                 iov_iter_advance(&direct_iov, offset - ctx->pos);
4003
4004         do {
4005                 if (open_file->invalidHandle) {
4006                         rc = cifs_reopen_file(open_file, true);
4007                         if (rc == -EAGAIN)
4008                                 continue;
4009                         else if (rc)
4010                                 break;
4011                 }
4012
4013                 if (cifs_sb->ctx->rsize == 0)
4014                         cifs_sb->ctx->rsize =
4015                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4016                                                              cifs_sb->ctx);
4017
4018                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4019                                                    &rsize, credits);
4020                 if (rc)
4021                         break;
4022
4023                 cur_len = min_t(const size_t, len, rsize);
4024
4025                 if (ctx->direct_io) {
4026                         ssize_t result;
4027
4028                         result = iov_iter_get_pages_alloc2(
4029                                         &direct_iov, &pagevec,
4030                                         cur_len, &start);
4031                         if (result < 0) {
4032                                 cifs_dbg(VFS,
4033                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
4034                                          result, iov_iter_type(&direct_iov),
4035                                          direct_iov.iov_offset,
4036                                          direct_iov.count);
4037                                 dump_stack();
4038
4039                                 rc = result;
4040                                 add_credits_and_wake_if(server, credits, 0);
4041                                 break;
4042                         }
4043                         cur_len = (size_t)result;
4044
4045                         rdata = cifs_readdata_direct_alloc(
4046                                         pagevec, cifs_uncached_readv_complete);
4047                         if (!rdata) {
4048                                 add_credits_and_wake_if(server, credits, 0);
4049                                 rc = -ENOMEM;
4050                                 break;
4051                         }
4052
4053                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
4054                         rdata->page_offset = start;
4055                         rdata->tailsz = npages > 1 ?
4056                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
4057                                 cur_len;
4058
4059                 } else {
4060
4061                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
4062                         /* allocate a readdata struct */
4063                         rdata = cifs_readdata_alloc(npages,
4064                                             cifs_uncached_readv_complete);
4065                         if (!rdata) {
4066                                 add_credits_and_wake_if(server, credits, 0);
4067                                 rc = -ENOMEM;
4068                                 break;
4069                         }
4070
4071                         rc = cifs_read_allocate_pages(rdata, npages);
4072                         if (rc) {
4073                                 kvfree(rdata->pages);
4074                                 kfree(rdata);
4075                                 add_credits_and_wake_if(server, credits, 0);
4076                                 break;
4077                         }
4078
4079                         rdata->tailsz = PAGE_SIZE;
4080                 }
4081
4082                 rdata->server = server;
4083                 rdata->cfile = cifsFileInfo_get(open_file);
4084                 rdata->nr_pages = npages;
4085                 rdata->offset = offset;
4086                 rdata->bytes = cur_len;
4087                 rdata->pid = pid;
4088                 rdata->pagesz = PAGE_SIZE;
4089                 rdata->read_into_pages = cifs_uncached_read_into_pages;
4090                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
4091                 rdata->credits = credits_on_stack;
4092                 rdata->ctx = ctx;
4093                 kref_get(&ctx->refcount);
4094
4095                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4096
4097                 if (!rc) {
4098                         if (rdata->cfile->invalidHandle)
4099                                 rc = -EAGAIN;
4100                         else
4101                                 rc = server->ops->async_readv(rdata);
4102                 }
4103
4104                 if (rc) {
4105                         add_credits_and_wake_if(server, &rdata->credits, 0);
4106                         kref_put(&rdata->refcount,
4107                                 cifs_uncached_readdata_release);
4108                         if (rc == -EAGAIN) {
4109                                 iov_iter_revert(&direct_iov, cur_len);
4110                                 continue;
4111                         }
4112                         break;
4113                 }
4114
4115                 list_add_tail(&rdata->list, rdata_list);
4116                 offset += cur_len;
4117                 len -= cur_len;
4118         } while (len > 0);
4119
4120         return rc;
4121 }
4122
4123 static void
4124 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4125 {
4126         struct cifs_readdata *rdata, *tmp;
4127         struct iov_iter *to = &ctx->iter;
4128         struct cifs_sb_info *cifs_sb;
4129         int rc;
4130
4131         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4132
4133         mutex_lock(&ctx->aio_mutex);
4134
4135         if (list_empty(&ctx->list)) {
4136                 mutex_unlock(&ctx->aio_mutex);
4137                 return;
4138         }
4139
4140         rc = ctx->rc;
4141         /* the loop below should proceed in the order of increasing offsets */
4142 again:
4143         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4144                 if (!rc) {
4145                         if (!try_wait_for_completion(&rdata->done)) {
4146                                 mutex_unlock(&ctx->aio_mutex);
4147                                 return;
4148                         }
4149
4150                         if (rdata->result == -EAGAIN) {
4151                                 /* resend call if it's a retryable error */
4152                                 struct list_head tmp_list;
4153                                 unsigned int got_bytes = rdata->got_bytes;
4154
4155                                 list_del_init(&rdata->list);
4156                                 INIT_LIST_HEAD(&tmp_list);
4157
4158                                 /*
4159                                  * Got a part of data and then reconnect has
4160                                  * happened -- fill the buffer and continue
4161                                  * reading.
4162                                  */
4163                                 if (got_bytes && got_bytes < rdata->bytes) {
4164                                         rc = 0;
4165                                         if (!ctx->direct_io)
4166                                                 rc = cifs_readdata_to_iov(rdata, to);
4167                                         if (rc) {
4168                                                 kref_put(&rdata->refcount,
4169                                                         cifs_uncached_readdata_release);
4170                                                 continue;
4171                                         }
4172                                 }
4173
4174                                 if (ctx->direct_io) {
4175                                         /*
4176                                          * Re-use rdata as this is a
4177                                          * direct I/O
4178                                          */
4179                                         rc = cifs_resend_rdata(
4180                                                 rdata,
4181                                                 &tmp_list, ctx);
4182                                 } else {
4183                                         rc = cifs_send_async_read(
4184                                                 rdata->offset + got_bytes,
4185                                                 rdata->bytes - got_bytes,
4186                                                 rdata->cfile, cifs_sb,
4187                                                 &tmp_list, ctx);
4188
4189                                         kref_put(&rdata->refcount,
4190                                                 cifs_uncached_readdata_release);
4191                                 }
4192
4193                                 list_splice(&tmp_list, &ctx->list);
4194
4195                                 goto again;
4196                         } else if (rdata->result)
4197                                 rc = rdata->result;
4198                         else if (!ctx->direct_io)
4199                                 rc = cifs_readdata_to_iov(rdata, to);
4200
4201                         /* if there was a short read -- discard anything left */
4202                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4203                                 rc = -ENODATA;
4204
4205                         ctx->total_len += rdata->got_bytes;
4206                 }
4207                 list_del_init(&rdata->list);
4208                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
4209         }
4210
4211         if (!ctx->direct_io)
4212                 ctx->total_len = ctx->len - iov_iter_count(to);
4213
4214         /* mask nodata case */
4215         if (rc == -ENODATA)
4216                 rc = 0;
4217
4218         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4219
4220         mutex_unlock(&ctx->aio_mutex);
4221
4222         if (ctx->iocb && ctx->iocb->ki_complete)
4223                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4224         else
4225                 complete(&ctx->done);
4226 }
4227
4228 static ssize_t __cifs_readv(
4229         struct kiocb *iocb, struct iov_iter *to, bool direct)
4230 {
4231         size_t len;
4232         struct file *file = iocb->ki_filp;
4233         struct cifs_sb_info *cifs_sb;
4234         struct cifsFileInfo *cfile;
4235         struct cifs_tcon *tcon;
4236         ssize_t rc, total_read = 0;
4237         loff_t offset = iocb->ki_pos;
4238         struct cifs_aio_ctx *ctx;
4239
4240         /*
4241          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
4242          * fall back to data copy read path
4243          * this could be improved by getting pages directly in ITER_KVEC
4244          */
4245         if (direct && iov_iter_is_kvec(to)) {
4246                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
4247                 direct = false;
4248         }
4249
4250         len = iov_iter_count(to);
4251         if (!len)
4252                 return 0;
4253
4254         cifs_sb = CIFS_FILE_SB(file);
4255         cfile = file->private_data;
4256         tcon = tlink_tcon(cfile->tlink);
4257
4258         if (!tcon->ses->server->ops->async_readv)
4259                 return -ENOSYS;
4260
4261         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4262                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4263
4264         ctx = cifs_aio_ctx_alloc();
4265         if (!ctx)
4266                 return -ENOMEM;
4267
4268         ctx->cfile = cifsFileInfo_get(cfile);
4269
4270         if (!is_sync_kiocb(iocb))
4271                 ctx->iocb = iocb;
4272
4273         if (user_backed_iter(to))
4274                 ctx->should_dirty = true;
4275
4276         if (direct) {
4277                 ctx->pos = offset;
4278                 ctx->direct_io = true;
4279                 ctx->iter = *to;
4280                 ctx->len = len;
4281         } else {
4282                 rc = setup_aio_ctx_iter(ctx, to, ITER_DEST);
4283                 if (rc) {
4284                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4285                         return rc;
4286                 }
4287                 len = ctx->len;
4288         }
4289
4290         if (direct) {
4291                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4292                                                   offset, offset + len - 1);
4293                 if (rc) {
4294                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4295                         return -EAGAIN;
4296                 }
4297         }
4298
4299         /* grab a lock here due to read response handlers can access ctx */
4300         mutex_lock(&ctx->aio_mutex);
4301
4302         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4303
4304         /* if at least one read request send succeeded, then reset rc */
4305         if (!list_empty(&ctx->list))
4306                 rc = 0;
4307
4308         mutex_unlock(&ctx->aio_mutex);
4309
4310         if (rc) {
4311                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4312                 return rc;
4313         }
4314
4315         if (!is_sync_kiocb(iocb)) {
4316                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4317                 return -EIOCBQUEUED;
4318         }
4319
4320         rc = wait_for_completion_killable(&ctx->done);
4321         if (rc) {
4322                 mutex_lock(&ctx->aio_mutex);
4323                 ctx->rc = rc = -EINTR;
4324                 total_read = ctx->total_len;
4325                 mutex_unlock(&ctx->aio_mutex);
4326         } else {
4327                 rc = ctx->rc;
4328                 total_read = ctx->total_len;
4329         }
4330
4331         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4332
4333         if (total_read) {
4334                 iocb->ki_pos += total_read;
4335                 return total_read;
4336         }
4337         return rc;
4338 }
4339
4340 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4341 {
4342         return __cifs_readv(iocb, to, true);
4343 }
4344
4345 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4346 {
4347         return __cifs_readv(iocb, to, false);
4348 }
4349
4350 ssize_t
4351 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4352 {
4353         struct inode *inode = file_inode(iocb->ki_filp);
4354         struct cifsInodeInfo *cinode = CIFS_I(inode);
4355         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4356         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4357                                                 iocb->ki_filp->private_data;
4358         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4359         int rc = -EACCES;
4360
4361         /*
4362          * In strict cache mode we need to read from the server all the time
4363          * if we don't have level II oplock because the server can delay mtime
4364          * change - so we can't make a decision about inode invalidating.
4365          * And we can also fail with pagereading if there are mandatory locks
4366          * on pages affected by this read but not on the region from pos to
4367          * pos+len-1.
4368          */
4369         if (!CIFS_CACHE_READ(cinode))
4370                 return cifs_user_readv(iocb, to);
4371
4372         if (cap_unix(tcon->ses) &&
4373             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4374             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4375                 return generic_file_read_iter(iocb, to);
4376
4377         /*
4378          * We need to hold the sem to be sure nobody modifies lock list
4379          * with a brlock that prevents reading.
4380          */
4381         down_read(&cinode->lock_sem);
4382         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4383                                      tcon->ses->server->vals->shared_lock_type,
4384                                      0, NULL, CIFS_READ_OP))
4385                 rc = generic_file_read_iter(iocb, to);
4386         up_read(&cinode->lock_sem);
4387         return rc;
4388 }
4389
4390 static ssize_t
4391 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4392 {
4393         int rc = -EACCES;
4394         unsigned int bytes_read = 0;
4395         unsigned int total_read;
4396         unsigned int current_read_size;
4397         unsigned int rsize;
4398         struct cifs_sb_info *cifs_sb;
4399         struct cifs_tcon *tcon;
4400         struct TCP_Server_Info *server;
4401         unsigned int xid;
4402         char *cur_offset;
4403         struct cifsFileInfo *open_file;
4404         struct cifs_io_parms io_parms = {0};
4405         int buf_type = CIFS_NO_BUFFER;
4406         __u32 pid;
4407
4408         xid = get_xid();
4409         cifs_sb = CIFS_FILE_SB(file);
4410
4411         /* FIXME: set up handlers for larger reads and/or convert to async */
4412         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4413
4414         if (file->private_data == NULL) {
4415                 rc = -EBADF;
4416                 free_xid(xid);
4417                 return rc;
4418         }
4419         open_file = file->private_data;
4420         tcon = tlink_tcon(open_file->tlink);
4421         server = cifs_pick_channel(tcon->ses);
4422
4423         if (!server->ops->sync_read) {
4424                 free_xid(xid);
4425                 return -ENOSYS;
4426         }
4427
4428         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4429                 pid = open_file->pid;
4430         else
4431                 pid = current->tgid;
4432
4433         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4434                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4435
4436         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4437              total_read += bytes_read, cur_offset += bytes_read) {
4438                 do {
4439                         current_read_size = min_t(uint, read_size - total_read,
4440                                                   rsize);
4441                         /*
4442                          * For windows me and 9x we do not want to request more
4443                          * than it negotiated since it will refuse the read
4444                          * then.
4445                          */
4446                         if (!(tcon->ses->capabilities &
4447                                 tcon->ses->server->vals->cap_large_files)) {
4448                                 current_read_size = min_t(uint,
4449                                         current_read_size, CIFSMaxBufSize);
4450                         }
4451                         if (open_file->invalidHandle) {
4452                                 rc = cifs_reopen_file(open_file, true);
4453                                 if (rc != 0)
4454                                         break;
4455                         }
4456                         io_parms.pid = pid;
4457                         io_parms.tcon = tcon;
4458                         io_parms.offset = *offset;
4459                         io_parms.length = current_read_size;
4460                         io_parms.server = server;
4461                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4462                                                     &bytes_read, &cur_offset,
4463                                                     &buf_type);
4464                 } while (rc == -EAGAIN);
4465
4466                 if (rc || (bytes_read == 0)) {
4467                         if (total_read) {
4468                                 break;
4469                         } else {
4470                                 free_xid(xid);
4471                                 return rc;
4472                         }
4473                 } else {
4474                         cifs_stats_bytes_read(tcon, total_read);
4475                         *offset += bytes_read;
4476                 }
4477         }
4478         free_xid(xid);
4479         return total_read;
4480 }
4481
4482 /*
4483  * If the page is mmap'ed into a process' page tables, then we need to make
4484  * sure that it doesn't change while being written back.
4485  */
4486 static vm_fault_t
4487 cifs_page_mkwrite(struct vm_fault *vmf)
4488 {
4489         struct page *page = vmf->page;
4490
4491         /* Wait for the page to be written to the cache before we allow it to
4492          * be modified.  We then assume the entire page will need writing back.
4493          */
4494 #ifdef CONFIG_CIFS_FSCACHE
4495         if (PageFsCache(page) &&
4496             wait_on_page_fscache_killable(page) < 0)
4497                 return VM_FAULT_RETRY;
4498 #endif
4499
4500         wait_on_page_writeback(page);
4501
4502         if (lock_page_killable(page) < 0)
4503                 return VM_FAULT_RETRY;
4504         return VM_FAULT_LOCKED;
4505 }
4506
4507 static const struct vm_operations_struct cifs_file_vm_ops = {
4508         .fault = filemap_fault,
4509         .map_pages = filemap_map_pages,
4510         .page_mkwrite = cifs_page_mkwrite,
4511 };
4512
4513 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4514 {
4515         int xid, rc = 0;
4516         struct inode *inode = file_inode(file);
4517
4518         xid = get_xid();
4519
4520         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4521                 rc = cifs_zap_mapping(inode);
4522         if (!rc)
4523                 rc = generic_file_mmap(file, vma);
4524         if (!rc)
4525                 vma->vm_ops = &cifs_file_vm_ops;
4526
4527         free_xid(xid);
4528         return rc;
4529 }
4530
4531 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4532 {
4533         int rc, xid;
4534
4535         xid = get_xid();
4536
4537         rc = cifs_revalidate_file(file);
4538         if (rc)
4539                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4540                          rc);
4541         if (!rc)
4542                 rc = generic_file_mmap(file, vma);
4543         if (!rc)
4544                 vma->vm_ops = &cifs_file_vm_ops;
4545
4546         free_xid(xid);
4547         return rc;
4548 }
4549
4550 static void
4551 cifs_readv_complete(struct work_struct *work)
4552 {
4553         unsigned int i, got_bytes;
4554         struct cifs_readdata *rdata = container_of(work,
4555                                                 struct cifs_readdata, work);
4556
4557         got_bytes = rdata->got_bytes;
4558         for (i = 0; i < rdata->nr_pages; i++) {
4559                 struct page *page = rdata->pages[i];
4560
4561                 if (rdata->result == 0 ||
4562                     (rdata->result == -EAGAIN && got_bytes)) {
4563                         flush_dcache_page(page);
4564                         SetPageUptodate(page);
4565                 } else
4566                         SetPageError(page);
4567
4568                 if (rdata->result == 0 ||
4569                     (rdata->result == -EAGAIN && got_bytes))
4570                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4571
4572                 unlock_page(page);
4573
4574                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4575
4576                 put_page(page);
4577                 rdata->pages[i] = NULL;
4578         }
4579         kref_put(&rdata->refcount, cifs_readdata_release);
4580 }
4581
4582 static int
4583 readpages_fill_pages(struct TCP_Server_Info *server,
4584                      struct cifs_readdata *rdata, struct iov_iter *iter,
4585                      unsigned int len)
4586 {
4587         int result = 0;
4588         unsigned int i;
4589         u64 eof;
4590         pgoff_t eof_index;
4591         unsigned int nr_pages = rdata->nr_pages;
4592         unsigned int page_offset = rdata->page_offset;
4593
4594         /* determine the eof that the server (probably) has */
4595         eof = CIFS_I(rdata->mapping->host)->server_eof;
4596         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4597         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4598
4599         rdata->got_bytes = 0;
4600         rdata->tailsz = PAGE_SIZE;
4601         for (i = 0; i < nr_pages; i++) {
4602                 struct page *page = rdata->pages[i];
4603                 unsigned int to_read = rdata->pagesz;
4604                 size_t n;
4605
4606                 if (i == 0)
4607                         to_read -= page_offset;
4608                 else
4609                         page_offset = 0;
4610
4611                 n = to_read;
4612
4613                 if (len >= to_read) {
4614                         len -= to_read;
4615                 } else if (len > 0) {
4616                         /* enough for partial page, fill and zero the rest */
4617                         zero_user(page, len + page_offset, to_read - len);
4618                         n = rdata->tailsz = len;
4619                         len = 0;
4620                 } else if (page->index > eof_index) {
4621                         /*
4622                          * The VFS will not try to do readahead past the
4623                          * i_size, but it's possible that we have outstanding
4624                          * writes with gaps in the middle and the i_size hasn't
4625                          * caught up yet. Populate those with zeroed out pages
4626                          * to prevent the VFS from repeatedly attempting to
4627                          * fill them until the writes are flushed.
4628                          */
4629                         zero_user(page, 0, PAGE_SIZE);
4630                         flush_dcache_page(page);
4631                         SetPageUptodate(page);
4632                         unlock_page(page);
4633                         put_page(page);
4634                         rdata->pages[i] = NULL;
4635                         rdata->nr_pages--;
4636                         continue;
4637                 } else {
4638                         /* no need to hold page hostage */
4639                         unlock_page(page);
4640                         put_page(page);
4641                         rdata->pages[i] = NULL;
4642                         rdata->nr_pages--;
4643                         continue;
4644                 }
4645
4646                 if (iter)
4647                         result = copy_page_from_iter(
4648                                         page, page_offset, n, iter);
4649 #ifdef CONFIG_CIFS_SMB_DIRECT
4650                 else if (rdata->mr)
4651                         result = n;
4652 #endif
4653                 else
4654                         result = cifs_read_page_from_socket(
4655                                         server, page, page_offset, n);
4656                 if (result < 0)
4657                         break;
4658
4659                 rdata->got_bytes += result;
4660         }
4661
4662         return result != -ECONNABORTED && rdata->got_bytes > 0 ?
4663                                                 rdata->got_bytes : result;
4664 }
4665
4666 static int
4667 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4668                                struct cifs_readdata *rdata, unsigned int len)
4669 {
4670         return readpages_fill_pages(server, rdata, NULL, len);
4671 }
4672
4673 static int
4674 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4675                                struct cifs_readdata *rdata,
4676                                struct iov_iter *iter)
4677 {
4678         return readpages_fill_pages(server, rdata, iter, iter->count);
4679 }
4680
4681 static void cifs_readahead(struct readahead_control *ractl)
4682 {
4683         int rc;
4684         struct cifsFileInfo *open_file = ractl->file->private_data;
4685         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4686         struct TCP_Server_Info *server;
4687         pid_t pid;
4688         unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0;
4689         pgoff_t next_cached = ULONG_MAX;
4690         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4691                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4692         bool check_cache = caching;
4693
4694         xid = get_xid();
4695
4696         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4697                 pid = open_file->pid;
4698         else
4699                 pid = current->tgid;
4700
4701         rc = 0;
4702         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4703
4704         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4705                  __func__, ractl->file, ractl->mapping, readahead_count(ractl));
4706
4707         /*
4708          * Chop the readahead request up into rsize-sized read requests.
4709          */
4710         while ((nr_pages = readahead_count(ractl) - last_batch_size)) {
4711                 unsigned int i, got, rsize;
4712                 struct page *page;
4713                 struct cifs_readdata *rdata;
4714                 struct cifs_credits credits_on_stack;
4715                 struct cifs_credits *credits = &credits_on_stack;
4716                 pgoff_t index = readahead_index(ractl) + last_batch_size;
4717
4718                 /*
4719                  * Find out if we have anything cached in the range of
4720                  * interest, and if so, where the next chunk of cached data is.
4721                  */
4722                 if (caching) {
4723                         if (check_cache) {
4724                                 rc = cifs_fscache_query_occupancy(
4725                                         ractl->mapping->host, index, nr_pages,
4726                                         &next_cached, &cache_nr_pages);
4727                                 if (rc < 0)
4728                                         caching = false;
4729                                 check_cache = false;
4730                         }
4731
4732                         if (index == next_cached) {
4733                                 /*
4734                                  * TODO: Send a whole batch of pages to be read
4735                                  * by the cache.
4736                                  */
4737                                 struct folio *folio = readahead_folio(ractl);
4738
4739                                 last_batch_size = folio_nr_pages(folio);
4740                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4741                                                                &folio->page) < 0) {
4742                                         /*
4743                                          * TODO: Deal with cache read failure
4744                                          * here, but for the moment, delegate
4745                                          * that to readpage.
4746                                          */
4747                                         caching = false;
4748                                 }
4749                                 folio_unlock(folio);
4750                                 next_cached++;
4751                                 cache_nr_pages--;
4752                                 if (cache_nr_pages == 0)
4753                                         check_cache = true;
4754                                 continue;
4755                         }
4756                 }
4757
4758                 if (open_file->invalidHandle) {
4759                         rc = cifs_reopen_file(open_file, true);
4760                         if (rc) {
4761                                 if (rc == -EAGAIN)
4762                                         continue;
4763                                 break;
4764                         }
4765                 }
4766
4767                 if (cifs_sb->ctx->rsize == 0)
4768                         cifs_sb->ctx->rsize =
4769                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4770                                                              cifs_sb->ctx);
4771
4772                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4773                                                    &rsize, credits);
4774                 if (rc)
4775                         break;
4776                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
4777                 nr_pages = min_t(size_t, nr_pages, next_cached - index);
4778
4779                 /*
4780                  * Give up immediately if rsize is too small to read an entire
4781                  * page. The VFS will fall back to readpage. We should never
4782                  * reach this point however since we set ra_pages to 0 when the
4783                  * rsize is smaller than a cache page.
4784                  */
4785                 if (unlikely(!nr_pages)) {
4786                         add_credits_and_wake_if(server, credits, 0);
4787                         break;
4788                 }
4789
4790                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4791                 if (!rdata) {
4792                         /* best to give up if we're out of mem */
4793                         add_credits_and_wake_if(server, credits, 0);
4794                         break;
4795                 }
4796
4797                 got = __readahead_batch(ractl, rdata->pages, nr_pages);
4798                 if (got != nr_pages) {
4799                         pr_warn("__readahead_batch() returned %u/%u\n",
4800                                 got, nr_pages);
4801                         nr_pages = got;
4802                 }
4803
4804                 rdata->nr_pages = nr_pages;
4805                 rdata->bytes    = readahead_batch_length(ractl);
4806                 rdata->cfile    = cifsFileInfo_get(open_file);
4807                 rdata->server   = server;
4808                 rdata->mapping  = ractl->mapping;
4809                 rdata->offset   = readahead_pos(ractl);
4810                 rdata->pid      = pid;
4811                 rdata->pagesz   = PAGE_SIZE;
4812                 rdata->tailsz   = PAGE_SIZE;
4813                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4814                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4815                 rdata->credits  = credits_on_stack;
4816
4817                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4818                 if (!rc) {
4819                         if (rdata->cfile->invalidHandle)
4820                                 rc = -EAGAIN;
4821                         else
4822                                 rc = server->ops->async_readv(rdata);
4823                 }
4824
4825                 if (rc) {
4826                         add_credits_and_wake_if(server, &rdata->credits, 0);
4827                         for (i = 0; i < rdata->nr_pages; i++) {
4828                                 page = rdata->pages[i];
4829                                 unlock_page(page);
4830                                 put_page(page);
4831                         }
4832                         /* Fallback to the readpage in error/reconnect cases */
4833                         kref_put(&rdata->refcount, cifs_readdata_release);
4834                         break;
4835                 }
4836
4837                 kref_put(&rdata->refcount, cifs_readdata_release);
4838                 last_batch_size = nr_pages;
4839         }
4840
4841         free_xid(xid);
4842 }
4843
4844 /*
4845  * cifs_readpage_worker must be called with the page pinned
4846  */
4847 static int cifs_readpage_worker(struct file *file, struct page *page,
4848         loff_t *poffset)
4849 {
4850         char *read_data;
4851         int rc;
4852
4853         /* Is the page cached? */
4854         rc = cifs_readpage_from_fscache(file_inode(file), page);
4855         if (rc == 0)
4856                 goto read_complete;
4857
4858         read_data = kmap(page);
4859         /* for reads over a certain size could initiate async read ahead */
4860
4861         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4862
4863         if (rc < 0)
4864                 goto io_error;
4865         else
4866                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4867
4868         /* we do not want atime to be less than mtime, it broke some apps */
4869         file_inode(file)->i_atime = current_time(file_inode(file));
4870         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4871                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4872         else
4873                 file_inode(file)->i_atime = current_time(file_inode(file));
4874
4875         if (PAGE_SIZE > rc)
4876                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4877
4878         flush_dcache_page(page);
4879         SetPageUptodate(page);
4880
4881         /* send this page to the cache */
4882         cifs_readpage_to_fscache(file_inode(file), page);
4883
4884         rc = 0;
4885
4886 io_error:
4887         kunmap(page);
4888         unlock_page(page);
4889
4890 read_complete:
4891         return rc;
4892 }
4893
4894 static int cifs_read_folio(struct file *file, struct folio *folio)
4895 {
4896         struct page *page = &folio->page;
4897         loff_t offset = page_file_offset(page);
4898         int rc = -EACCES;
4899         unsigned int xid;
4900
4901         xid = get_xid();
4902
4903         if (file->private_data == NULL) {
4904                 rc = -EBADF;
4905                 free_xid(xid);
4906                 return rc;
4907         }
4908
4909         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4910                  page, (int)offset, (int)offset);
4911
4912         rc = cifs_readpage_worker(file, page, &offset);
4913
4914         free_xid(xid);
4915         return rc;
4916 }
4917
4918 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4919 {
4920         struct cifsFileInfo *open_file;
4921
4922         spin_lock(&cifs_inode->open_file_lock);
4923         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4924                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4925                         spin_unlock(&cifs_inode->open_file_lock);
4926                         return 1;
4927                 }
4928         }
4929         spin_unlock(&cifs_inode->open_file_lock);
4930         return 0;
4931 }
4932
4933 /* We do not want to update the file size from server for inodes
4934    open for write - to avoid races with writepage extending
4935    the file - in the future we could consider allowing
4936    refreshing the inode only on increases in the file size
4937    but this is tricky to do without racing with writebehind
4938    page caching in the current Linux kernel design */
4939 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4940 {
4941         if (!cifsInode)
4942                 return true;
4943
4944         if (is_inode_writable(cifsInode)) {
4945                 /* This inode is open for write at least once */
4946                 struct cifs_sb_info *cifs_sb;
4947
4948                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4949                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4950                         /* since no page cache to corrupt on directio
4951                         we can change size safely */
4952                         return true;
4953                 }
4954
4955                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4956                         return true;
4957
4958                 return false;
4959         } else
4960                 return true;
4961 }
4962
4963 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4964                         loff_t pos, unsigned len,
4965                         struct page **pagep, void **fsdata)
4966 {
4967         int oncethru = 0;
4968         pgoff_t index = pos >> PAGE_SHIFT;
4969         loff_t offset = pos & (PAGE_SIZE - 1);
4970         loff_t page_start = pos & PAGE_MASK;
4971         loff_t i_size;
4972         struct page *page;
4973         int rc = 0;
4974
4975         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4976
4977 start:
4978         page = grab_cache_page_write_begin(mapping, index);
4979         if (!page) {
4980                 rc = -ENOMEM;
4981                 goto out;
4982         }
4983
4984         if (PageUptodate(page))
4985                 goto out;
4986
4987         /*
4988          * If we write a full page it will be up to date, no need to read from
4989          * the server. If the write is short, we'll end up doing a sync write
4990          * instead.
4991          */
4992         if (len == PAGE_SIZE)
4993                 goto out;
4994
4995         /*
4996          * optimize away the read when we have an oplock, and we're not
4997          * expecting to use any of the data we'd be reading in. That
4998          * is, when the page lies beyond the EOF, or straddles the EOF
4999          * and the write will cover all of the existing data.
5000          */
5001         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
5002                 i_size = i_size_read(mapping->host);
5003                 if (page_start >= i_size ||
5004                     (offset == 0 && (pos + len) >= i_size)) {
5005                         zero_user_segments(page, 0, offset,
5006                                            offset + len,
5007                                            PAGE_SIZE);
5008                         /*
5009                          * PageChecked means that the parts of the page
5010                          * to which we're not writing are considered up
5011                          * to date. Once the data is copied to the
5012                          * page, it can be set uptodate.
5013                          */
5014                         SetPageChecked(page);
5015                         goto out;
5016                 }
5017         }
5018
5019         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
5020                 /*
5021                  * might as well read a page, it is fast enough. If we get
5022                  * an error, we don't need to return it. cifs_write_end will
5023                  * do a sync write instead since PG_uptodate isn't set.
5024                  */
5025                 cifs_readpage_worker(file, page, &page_start);
5026                 put_page(page);
5027                 oncethru = 1;
5028                 goto start;
5029         } else {
5030                 /* we could try using another file handle if there is one -
5031                    but how would we lock it to prevent close of that handle
5032                    racing with this read? In any case
5033                    this will be written out by write_end so is fine */
5034         }
5035 out:
5036         *pagep = page;
5037         return rc;
5038 }
5039
5040 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
5041 {
5042         if (folio_test_private(folio))
5043                 return 0;
5044         if (folio_test_fscache(folio)) {
5045                 if (current_is_kswapd() || !(gfp & __GFP_FS))
5046                         return false;
5047                 folio_wait_fscache(folio);
5048         }
5049         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
5050         return true;
5051 }
5052
5053 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
5054                                  size_t length)
5055 {
5056         folio_wait_fscache(folio);
5057 }
5058
5059 static int cifs_launder_folio(struct folio *folio)
5060 {
5061         int rc = 0;
5062         loff_t range_start = folio_pos(folio);
5063         loff_t range_end = range_start + folio_size(folio);
5064         struct writeback_control wbc = {
5065                 .sync_mode = WB_SYNC_ALL,
5066                 .nr_to_write = 0,
5067                 .range_start = range_start,
5068                 .range_end = range_end,
5069         };
5070
5071         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5072
5073         if (folio_clear_dirty_for_io(folio))
5074                 rc = cifs_writepage_locked(&folio->page, &wbc);
5075
5076         folio_wait_fscache(folio);
5077         return rc;
5078 }
5079
5080 void cifs_oplock_break(struct work_struct *work)
5081 {
5082         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5083                                                   oplock_break);
5084         struct inode *inode = d_inode(cfile->dentry);
5085         struct cifsInodeInfo *cinode = CIFS_I(inode);
5086         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
5087         struct TCP_Server_Info *server = tcon->ses->server;
5088         int rc = 0;
5089         bool purge_cache = false;
5090
5091         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5092                         TASK_UNINTERRUPTIBLE);
5093
5094         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5095                                       cfile->oplock_epoch, &purge_cache);
5096
5097         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5098                                                 cifs_has_mand_locks(cinode)) {
5099                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5100                          inode);
5101                 cinode->oplock = 0;
5102         }
5103
5104         if (inode && S_ISREG(inode->i_mode)) {
5105                 if (CIFS_CACHE_READ(cinode))
5106                         break_lease(inode, O_RDONLY);
5107                 else
5108                         break_lease(inode, O_WRONLY);
5109                 rc = filemap_fdatawrite(inode->i_mapping);
5110                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5111                         rc = filemap_fdatawait(inode->i_mapping);
5112                         mapping_set_error(inode->i_mapping, rc);
5113                         cifs_zap_mapping(inode);
5114                 }
5115                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5116                 if (CIFS_CACHE_WRITE(cinode))
5117                         goto oplock_break_ack;
5118         }
5119
5120         rc = cifs_push_locks(cfile);
5121         if (rc)
5122                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5123
5124 oplock_break_ack:
5125         /*
5126          * releasing stale oplock after recent reconnect of smb session using
5127          * a now incorrect file handle is not a data integrity issue but do
5128          * not bother sending an oplock release if session to server still is
5129          * disconnected since oplock already released by the server
5130          */
5131         if (!cfile->oplock_break_cancelled) {
5132                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
5133                                                              cinode);
5134                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5135         }
5136
5137         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5138         cifs_done_oplock_break(cinode);
5139 }
5140
5141 /*
5142  * The presence of cifs_direct_io() in the address space ops vector
5143  * allowes open() O_DIRECT flags which would have failed otherwise.
5144  *
5145  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5146  * so this method should never be called.
5147  *
5148  * Direct IO is not yet supported in the cached mode.
5149  */
5150 static ssize_t
5151 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5152 {
5153         /*
5154          * FIXME
5155          * Eventually need to support direct IO for non forcedirectio mounts
5156          */
5157         return -EINVAL;
5158 }
5159
5160 static int cifs_swap_activate(struct swap_info_struct *sis,
5161                               struct file *swap_file, sector_t *span)
5162 {
5163         struct cifsFileInfo *cfile = swap_file->private_data;
5164         struct inode *inode = swap_file->f_mapping->host;
5165         unsigned long blocks;
5166         long long isize;
5167
5168         cifs_dbg(FYI, "swap activate\n");
5169
5170         if (!swap_file->f_mapping->a_ops->swap_rw)
5171                 /* Cannot support swap */
5172                 return -EINVAL;
5173
5174         spin_lock(&inode->i_lock);
5175         blocks = inode->i_blocks;
5176         isize = inode->i_size;
5177         spin_unlock(&inode->i_lock);
5178         if (blocks*512 < isize) {
5179                 pr_warn("swap activate: swapfile has holes\n");
5180                 return -EINVAL;
5181         }
5182         *span = sis->pages;
5183
5184         pr_warn_once("Swap support over SMB3 is experimental\n");
5185
5186         /*
5187          * TODO: consider adding ACL (or documenting how) to prevent other
5188          * users (on this or other systems) from reading it
5189          */
5190
5191
5192         /* TODO: add sk_set_memalloc(inet) or similar */
5193
5194         if (cfile)
5195                 cfile->swapfile = true;
5196         /*
5197          * TODO: Since file already open, we can't open with DENY_ALL here
5198          * but we could add call to grab a byte range lock to prevent others
5199          * from reading or writing the file
5200          */
5201
5202         sis->flags |= SWP_FS_OPS;
5203         return add_swap_extent(sis, 0, sis->max, 0);
5204 }
5205
5206 static void cifs_swap_deactivate(struct file *file)
5207 {
5208         struct cifsFileInfo *cfile = file->private_data;
5209
5210         cifs_dbg(FYI, "swap deactivate\n");
5211
5212         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5213
5214         if (cfile)
5215                 cfile->swapfile = false;
5216
5217         /* do we need to unpin (or unlock) the file */
5218 }
5219
5220 /*
5221  * Mark a page as having been made dirty and thus needing writeback.  We also
5222  * need to pin the cache object to write back to.
5223  */
5224 #ifdef CONFIG_CIFS_FSCACHE
5225 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5226 {
5227         return fscache_dirty_folio(mapping, folio,
5228                                         cifs_inode_cookie(mapping->host));
5229 }
5230 #else
5231 #define cifs_dirty_folio filemap_dirty_folio
5232 #endif
5233
5234 const struct address_space_operations cifs_addr_ops = {
5235         .read_folio = cifs_read_folio,
5236         .readahead = cifs_readahead,
5237         .writepage = cifs_writepage,
5238         .writepages = cifs_writepages,
5239         .write_begin = cifs_write_begin,
5240         .write_end = cifs_write_end,
5241         .dirty_folio = cifs_dirty_folio,
5242         .release_folio = cifs_release_folio,
5243         .direct_IO = cifs_direct_io,
5244         .invalidate_folio = cifs_invalidate_folio,
5245         .launder_folio = cifs_launder_folio,
5246         /*
5247          * TODO: investigate and if useful we could add an cifs_migratePage
5248          * helper (under an CONFIG_MIGRATION) in the future, and also
5249          * investigate and add an is_dirty_writeback helper if needed
5250          */
5251         .swap_activate = cifs_swap_activate,
5252         .swap_deactivate = cifs_swap_deactivate,
5253 };
5254
5255 /*
5256  * cifs_readahead requires the server to support a buffer large enough to
5257  * contain the header plus one complete page of data.  Otherwise, we need
5258  * to leave cifs_readahead out of the address space operations.
5259  */
5260 const struct address_space_operations cifs_addr_ops_smallbuf = {
5261         .read_folio = cifs_read_folio,
5262         .writepage = cifs_writepage,
5263         .writepages = cifs_writepages,
5264         .write_begin = cifs_write_begin,
5265         .write_end = cifs_write_end,
5266         .dirty_folio = cifs_dirty_folio,
5267         .release_folio = cifs_release_folio,
5268         .invalidate_folio = cifs_invalidate_folio,
5269         .launder_folio = cifs_launder_folio,
5270 };