32a8525415d96b8c4c6643ba070886556aa4058b
[linux-modified.git] / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45         struct address_space *mapping = inode->i_mapping;
46         struct folio *folio;
47         pgoff_t end;
48
49         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50
51         rcu_read_lock();
52
53         end = (start + len - 1) / PAGE_SIZE;
54         xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55                 if (xas_retry(&xas, folio))
56                         continue;
57                 xas_pause(&xas);
58                 rcu_read_unlock();
59                 folio_lock(folio);
60                 folio_clear_dirty_for_io(folio);
61                 folio_unlock(folio);
62                 rcu_read_lock();
63         }
64
65         rcu_read_unlock();
66 }
67
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73         struct address_space *mapping = inode->i_mapping;
74         struct folio *folio;
75         pgoff_t end;
76
77         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78
79         if (!len)
80                 return;
81
82         rcu_read_lock();
83
84         end = (start + len - 1) / PAGE_SIZE;
85         xas_for_each(&xas, folio, end) {
86                 if (xas_retry(&xas, folio))
87                         continue;
88                 if (!folio_test_writeback(folio)) {
89                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90                                   len, start, folio_index(folio), end);
91                         continue;
92                 }
93
94                 folio_detach_private(folio);
95                 folio_end_writeback(folio);
96         }
97
98         rcu_read_unlock();
99 }
100
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106         struct address_space *mapping = inode->i_mapping;
107         struct folio *folio;
108         pgoff_t end;
109
110         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111
112         if (!len)
113                 return;
114
115         rcu_read_lock();
116
117         end = (start + len - 1) / PAGE_SIZE;
118         xas_for_each(&xas, folio, end) {
119                 if (xas_retry(&xas, folio))
120                         continue;
121                 if (!folio_test_writeback(folio)) {
122                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123                                   len, start, folio_index(folio), end);
124                         continue;
125                 }
126
127                 folio_set_error(folio);
128                 folio_end_writeback(folio);
129         }
130
131         rcu_read_unlock();
132 }
133
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139         struct address_space *mapping = inode->i_mapping;
140         struct folio *folio;
141         pgoff_t end;
142
143         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144
145         if (!len)
146                 return;
147
148         rcu_read_lock();
149
150         end = (start + len - 1) / PAGE_SIZE;
151         xas_for_each(&xas, folio, end) {
152                 if (!folio_test_writeback(folio)) {
153                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154                                   len, start, folio_index(folio), end);
155                         continue;
156                 }
157
158                 filemap_dirty_folio(folio->mapping, folio);
159                 folio_end_writeback(folio);
160         }
161
162         rcu_read_unlock();
163 }
164
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172         struct cifsFileInfo *open_file = NULL;
173         struct list_head *tmp;
174         struct list_head *tmp1;
175
176         /* only send once per connect */
177         spin_lock(&tcon->tc_lock);
178         if (tcon->status != TID_NEED_RECON) {
179                 spin_unlock(&tcon->tc_lock);
180                 return;
181         }
182         tcon->status = TID_IN_FILES_INVALIDATE;
183         spin_unlock(&tcon->tc_lock);
184
185         /* list all files open on tree connection and mark them invalid */
186         spin_lock(&tcon->open_file_lock);
187         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
188                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
189                 open_file->invalidHandle = true;
190                 open_file->oplock_break_cancelled = true;
191         }
192         spin_unlock(&tcon->open_file_lock);
193
194         invalidate_all_cached_dirs(tcon);
195         spin_lock(&tcon->tc_lock);
196         if (tcon->status == TID_IN_FILES_INVALIDATE)
197                 tcon->status = TID_NEED_TCON;
198         spin_unlock(&tcon->tc_lock);
199
200         /*
201          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
202          * to this tcon.
203          */
204 }
205
206 static inline int cifs_convert_flags(unsigned int flags)
207 {
208         if ((flags & O_ACCMODE) == O_RDONLY)
209                 return GENERIC_READ;
210         else if ((flags & O_ACCMODE) == O_WRONLY)
211                 return GENERIC_WRITE;
212         else if ((flags & O_ACCMODE) == O_RDWR) {
213                 /* GENERIC_ALL is too much permission to request
214                    can cause unnecessary access denied on create */
215                 /* return GENERIC_ALL; */
216                 return (GENERIC_READ | GENERIC_WRITE);
217         }
218
219         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
220                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
221                 FILE_READ_DATA);
222 }
223
224 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
225 static u32 cifs_posix_convert_flags(unsigned int flags)
226 {
227         u32 posix_flags = 0;
228
229         if ((flags & O_ACCMODE) == O_RDONLY)
230                 posix_flags = SMB_O_RDONLY;
231         else if ((flags & O_ACCMODE) == O_WRONLY)
232                 posix_flags = SMB_O_WRONLY;
233         else if ((flags & O_ACCMODE) == O_RDWR)
234                 posix_flags = SMB_O_RDWR;
235
236         if (flags & O_CREAT) {
237                 posix_flags |= SMB_O_CREAT;
238                 if (flags & O_EXCL)
239                         posix_flags |= SMB_O_EXCL;
240         } else if (flags & O_EXCL)
241                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
242                          current->comm, current->tgid);
243
244         if (flags & O_TRUNC)
245                 posix_flags |= SMB_O_TRUNC;
246         /* be safe and imply O_SYNC for O_DSYNC */
247         if (flags & O_DSYNC)
248                 posix_flags |= SMB_O_SYNC;
249         if (flags & O_DIRECTORY)
250                 posix_flags |= SMB_O_DIRECTORY;
251         if (flags & O_NOFOLLOW)
252                 posix_flags |= SMB_O_NOFOLLOW;
253         if (flags & O_DIRECT)
254                 posix_flags |= SMB_O_DIRECT;
255
256         return posix_flags;
257 }
258 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
259
260 static inline int cifs_get_disposition(unsigned int flags)
261 {
262         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
263                 return FILE_CREATE;
264         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
265                 return FILE_OVERWRITE_IF;
266         else if ((flags & O_CREAT) == O_CREAT)
267                 return FILE_OPEN_IF;
268         else if ((flags & O_TRUNC) == O_TRUNC)
269                 return FILE_OVERWRITE;
270         else
271                 return FILE_OPEN;
272 }
273
274 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
275 int cifs_posix_open(const char *full_path, struct inode **pinode,
276                         struct super_block *sb, int mode, unsigned int f_flags,
277                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
278 {
279         int rc;
280         FILE_UNIX_BASIC_INFO *presp_data;
281         __u32 posix_flags = 0;
282         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
283         struct cifs_fattr fattr;
284         struct tcon_link *tlink;
285         struct cifs_tcon *tcon;
286
287         cifs_dbg(FYI, "posix open %s\n", full_path);
288
289         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
290         if (presp_data == NULL)
291                 return -ENOMEM;
292
293         tlink = cifs_sb_tlink(cifs_sb);
294         if (IS_ERR(tlink)) {
295                 rc = PTR_ERR(tlink);
296                 goto posix_open_ret;
297         }
298
299         tcon = tlink_tcon(tlink);
300         mode &= ~current_umask();
301
302         posix_flags = cifs_posix_convert_flags(f_flags);
303         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
304                              poplock, full_path, cifs_sb->local_nls,
305                              cifs_remap(cifs_sb));
306         cifs_put_tlink(tlink);
307
308         if (rc)
309                 goto posix_open_ret;
310
311         if (presp_data->Type == cpu_to_le32(-1))
312                 goto posix_open_ret; /* open ok, caller does qpathinfo */
313
314         if (!pinode)
315                 goto posix_open_ret; /* caller does not need info */
316
317         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
318
319         /* get new inode and set it up */
320         if (*pinode == NULL) {
321                 cifs_fill_uniqueid(sb, &fattr);
322                 *pinode = cifs_iget(sb, &fattr);
323                 if (!*pinode) {
324                         rc = -ENOMEM;
325                         goto posix_open_ret;
326                 }
327         } else {
328                 cifs_revalidate_mapping(*pinode);
329                 rc = cifs_fattr_to_inode(*pinode, &fattr);
330         }
331
332 posix_open_ret:
333         kfree(presp_data);
334         return rc;
335 }
336 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
337
338 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
339                         struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
340                         struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
341 {
342         int rc;
343         int desired_access;
344         int disposition;
345         int create_options = CREATE_NOT_DIR;
346         struct TCP_Server_Info *server = tcon->ses->server;
347         struct cifs_open_parms oparms;
348
349         if (!server->ops->open)
350                 return -ENOSYS;
351
352         desired_access = cifs_convert_flags(f_flags);
353
354 /*********************************************************************
355  *  open flag mapping table:
356  *
357  *      POSIX Flag            CIFS Disposition
358  *      ----------            ----------------
359  *      O_CREAT               FILE_OPEN_IF
360  *      O_CREAT | O_EXCL      FILE_CREATE
361  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
362  *      O_TRUNC               FILE_OVERWRITE
363  *      none of the above     FILE_OPEN
364  *
365  *      Note that there is not a direct match between disposition
366  *      FILE_SUPERSEDE (ie create whether or not file exists although
367  *      O_CREAT | O_TRUNC is similar but truncates the existing
368  *      file rather than creating a new file as FILE_SUPERSEDE does
369  *      (which uses the attributes / metadata passed in on open call)
370  *?
371  *?  O_SYNC is a reasonable match to CIFS writethrough flag
372  *?  and the read write flags match reasonably.  O_LARGEFILE
373  *?  is irrelevant because largefile support is always used
374  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
375  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
376  *********************************************************************/
377
378         disposition = cifs_get_disposition(f_flags);
379
380         /* BB pass O_SYNC flag through on file attributes .. BB */
381
382         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
383         if (f_flags & O_SYNC)
384                 create_options |= CREATE_WRITE_THROUGH;
385
386         if (f_flags & O_DIRECT)
387                 create_options |= CREATE_NO_BUFFER;
388
389         oparms = (struct cifs_open_parms) {
390                 .tcon = tcon,
391                 .cifs_sb = cifs_sb,
392                 .desired_access = desired_access,
393                 .create_options = cifs_create_options(cifs_sb, create_options),
394                 .disposition = disposition,
395                 .path = full_path,
396                 .fid = fid,
397         };
398
399         rc = server->ops->open(xid, &oparms, oplock, buf);
400         if (rc)
401                 return rc;
402
403         /* TODO: Add support for calling posix query info but with passing in fid */
404         if (tcon->unix_ext)
405                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
406                                               xid);
407         else
408                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
409                                          xid, fid);
410
411         if (rc) {
412                 server->ops->close(xid, tcon, fid);
413                 if (rc == -ESTALE)
414                         rc = -EOPENSTALE;
415         }
416
417         return rc;
418 }
419
420 static bool
421 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
422 {
423         struct cifs_fid_locks *cur;
424         bool has_locks = false;
425
426         down_read(&cinode->lock_sem);
427         list_for_each_entry(cur, &cinode->llist, llist) {
428                 if (!list_empty(&cur->locks)) {
429                         has_locks = true;
430                         break;
431                 }
432         }
433         up_read(&cinode->lock_sem);
434         return has_locks;
435 }
436
437 void
438 cifs_down_write(struct rw_semaphore *sem)
439 {
440         while (!down_write_trylock(sem))
441                 msleep(10);
442 }
443
444 static void cifsFileInfo_put_work(struct work_struct *work);
445
446 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
447                                        struct tcon_link *tlink, __u32 oplock,
448                                        const char *symlink_target)
449 {
450         struct dentry *dentry = file_dentry(file);
451         struct inode *inode = d_inode(dentry);
452         struct cifsInodeInfo *cinode = CIFS_I(inode);
453         struct cifsFileInfo *cfile;
454         struct cifs_fid_locks *fdlocks;
455         struct cifs_tcon *tcon = tlink_tcon(tlink);
456         struct TCP_Server_Info *server = tcon->ses->server;
457
458         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
459         if (cfile == NULL)
460                 return cfile;
461
462         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
463         if (!fdlocks) {
464                 kfree(cfile);
465                 return NULL;
466         }
467
468         if (symlink_target) {
469                 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
470                 if (!cfile->symlink_target) {
471                         kfree(fdlocks);
472                         kfree(cfile);
473                         return NULL;
474                 }
475         }
476
477         INIT_LIST_HEAD(&fdlocks->locks);
478         fdlocks->cfile = cfile;
479         cfile->llist = fdlocks;
480
481         cfile->count = 1;
482         cfile->pid = current->tgid;
483         cfile->uid = current_fsuid();
484         cfile->dentry = dget(dentry);
485         cfile->f_flags = file->f_flags;
486         cfile->invalidHandle = false;
487         cfile->deferred_close_scheduled = false;
488         cfile->tlink = cifs_get_tlink(tlink);
489         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
490         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
491         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
492         mutex_init(&cfile->fh_mutex);
493         spin_lock_init(&cfile->file_info_lock);
494
495         cifs_sb_active(inode->i_sb);
496
497         /*
498          * If the server returned a read oplock and we have mandatory brlocks,
499          * set oplock level to None.
500          */
501         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
502                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
503                 oplock = 0;
504         }
505
506         cifs_down_write(&cinode->lock_sem);
507         list_add(&fdlocks->llist, &cinode->llist);
508         up_write(&cinode->lock_sem);
509
510         spin_lock(&tcon->open_file_lock);
511         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
512                 oplock = fid->pending_open->oplock;
513         list_del(&fid->pending_open->olist);
514
515         fid->purge_cache = false;
516         server->ops->set_fid(cfile, fid, oplock);
517
518         list_add(&cfile->tlist, &tcon->openFileList);
519         atomic_inc(&tcon->num_local_opens);
520
521         /* if readable file instance put first in list*/
522         spin_lock(&cinode->open_file_lock);
523         if (file->f_mode & FMODE_READ)
524                 list_add(&cfile->flist, &cinode->openFileList);
525         else
526                 list_add_tail(&cfile->flist, &cinode->openFileList);
527         spin_unlock(&cinode->open_file_lock);
528         spin_unlock(&tcon->open_file_lock);
529
530         if (fid->purge_cache)
531                 cifs_zap_mapping(inode);
532
533         file->private_data = cfile;
534         return cfile;
535 }
536
537 struct cifsFileInfo *
538 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
539 {
540         spin_lock(&cifs_file->file_info_lock);
541         cifsFileInfo_get_locked(cifs_file);
542         spin_unlock(&cifs_file->file_info_lock);
543         return cifs_file;
544 }
545
546 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
547 {
548         struct inode *inode = d_inode(cifs_file->dentry);
549         struct cifsInodeInfo *cifsi = CIFS_I(inode);
550         struct cifsLockInfo *li, *tmp;
551         struct super_block *sb = inode->i_sb;
552
553         /*
554          * Delete any outstanding lock records. We'll lose them when the file
555          * is closed anyway.
556          */
557         cifs_down_write(&cifsi->lock_sem);
558         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
559                 list_del(&li->llist);
560                 cifs_del_lock_waiters(li);
561                 kfree(li);
562         }
563         list_del(&cifs_file->llist->llist);
564         kfree(cifs_file->llist);
565         up_write(&cifsi->lock_sem);
566
567         cifs_put_tlink(cifs_file->tlink);
568         dput(cifs_file->dentry);
569         cifs_sb_deactive(sb);
570         kfree(cifs_file->symlink_target);
571         kfree(cifs_file);
572 }
573
574 static void cifsFileInfo_put_work(struct work_struct *work)
575 {
576         struct cifsFileInfo *cifs_file = container_of(work,
577                         struct cifsFileInfo, put);
578
579         cifsFileInfo_put_final(cifs_file);
580 }
581
582 /**
583  * cifsFileInfo_put - release a reference of file priv data
584  *
585  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
586  *
587  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
588  */
589 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
590 {
591         _cifsFileInfo_put(cifs_file, true, true);
592 }
593
594 /**
595  * _cifsFileInfo_put - release a reference of file priv data
596  *
597  * This may involve closing the filehandle @cifs_file out on the
598  * server. Must be called without holding tcon->open_file_lock,
599  * cinode->open_file_lock and cifs_file->file_info_lock.
600  *
601  * If @wait_for_oplock_handler is true and we are releasing the last
602  * reference, wait for any running oplock break handler of the file
603  * and cancel any pending one.
604  *
605  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
606  * @wait_oplock_handler: must be false if called from oplock_break_handler
607  * @offload:    not offloaded on close and oplock breaks
608  *
609  */
610 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
611                        bool wait_oplock_handler, bool offload)
612 {
613         struct inode *inode = d_inode(cifs_file->dentry);
614         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
615         struct TCP_Server_Info *server = tcon->ses->server;
616         struct cifsInodeInfo *cifsi = CIFS_I(inode);
617         struct super_block *sb = inode->i_sb;
618         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
619         struct cifs_fid fid = {};
620         struct cifs_pending_open open;
621         bool oplock_break_cancelled;
622
623         spin_lock(&tcon->open_file_lock);
624         spin_lock(&cifsi->open_file_lock);
625         spin_lock(&cifs_file->file_info_lock);
626         if (--cifs_file->count > 0) {
627                 spin_unlock(&cifs_file->file_info_lock);
628                 spin_unlock(&cifsi->open_file_lock);
629                 spin_unlock(&tcon->open_file_lock);
630                 return;
631         }
632         spin_unlock(&cifs_file->file_info_lock);
633
634         if (server->ops->get_lease_key)
635                 server->ops->get_lease_key(inode, &fid);
636
637         /* store open in pending opens to make sure we don't miss lease break */
638         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
639
640         /* remove it from the lists */
641         list_del(&cifs_file->flist);
642         list_del(&cifs_file->tlist);
643         atomic_dec(&tcon->num_local_opens);
644
645         if (list_empty(&cifsi->openFileList)) {
646                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
647                          d_inode(cifs_file->dentry));
648                 /*
649                  * In strict cache mode we need invalidate mapping on the last
650                  * close  because it may cause a error when we open this file
651                  * again and get at least level II oplock.
652                  */
653                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
654                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
655                 cifs_set_oplock_level(cifsi, 0);
656         }
657
658         spin_unlock(&cifsi->open_file_lock);
659         spin_unlock(&tcon->open_file_lock);
660
661         oplock_break_cancelled = wait_oplock_handler ?
662                 cancel_work_sync(&cifs_file->oplock_break) : false;
663
664         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
665                 struct TCP_Server_Info *server = tcon->ses->server;
666                 unsigned int xid;
667
668                 xid = get_xid();
669                 if (server->ops->close_getattr)
670                         server->ops->close_getattr(xid, tcon, cifs_file);
671                 else if (server->ops->close)
672                         server->ops->close(xid, tcon, &cifs_file->fid);
673                 _free_xid(xid);
674         }
675
676         if (oplock_break_cancelled)
677                 cifs_done_oplock_break(cifsi);
678
679         cifs_del_pending_open(&open);
680
681         if (offload)
682                 queue_work(fileinfo_put_wq, &cifs_file->put);
683         else
684                 cifsFileInfo_put_final(cifs_file);
685 }
686
687 int cifs_open(struct inode *inode, struct file *file)
688
689 {
690         int rc = -EACCES;
691         unsigned int xid;
692         __u32 oplock;
693         struct cifs_sb_info *cifs_sb;
694         struct TCP_Server_Info *server;
695         struct cifs_tcon *tcon;
696         struct tcon_link *tlink;
697         struct cifsFileInfo *cfile = NULL;
698         void *page;
699         const char *full_path;
700         bool posix_open_ok = false;
701         struct cifs_fid fid = {};
702         struct cifs_pending_open open;
703         struct cifs_open_info_data data = {};
704
705         xid = get_xid();
706
707         cifs_sb = CIFS_SB(inode->i_sb);
708         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
709                 free_xid(xid);
710                 return -EIO;
711         }
712
713         tlink = cifs_sb_tlink(cifs_sb);
714         if (IS_ERR(tlink)) {
715                 free_xid(xid);
716                 return PTR_ERR(tlink);
717         }
718         tcon = tlink_tcon(tlink);
719         server = tcon->ses->server;
720
721         page = alloc_dentry_path();
722         full_path = build_path_from_dentry(file_dentry(file), page);
723         if (IS_ERR(full_path)) {
724                 rc = PTR_ERR(full_path);
725                 goto out;
726         }
727
728         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
729                  inode, file->f_flags, full_path);
730
731         if (file->f_flags & O_DIRECT &&
732             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
733                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
734                         file->f_op = &cifs_file_direct_nobrl_ops;
735                 else
736                         file->f_op = &cifs_file_direct_ops;
737         }
738
739         /* Get the cached handle as SMB2 close is deferred */
740         rc = cifs_get_readable_path(tcon, full_path, &cfile);
741         if (rc == 0) {
742                 if (file->f_flags == cfile->f_flags) {
743                         file->private_data = cfile;
744                         spin_lock(&CIFS_I(inode)->deferred_lock);
745                         cifs_del_deferred_close(cfile);
746                         spin_unlock(&CIFS_I(inode)->deferred_lock);
747                         goto use_cache;
748                 } else {
749                         _cifsFileInfo_put(cfile, true, false);
750                 }
751         }
752
753         if (server->oplocks)
754                 oplock = REQ_OPLOCK;
755         else
756                 oplock = 0;
757
758 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
759         if (!tcon->broken_posix_open && tcon->unix_ext &&
760             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
761                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
762                 /* can not refresh inode info since size could be stale */
763                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
764                                 cifs_sb->ctx->file_mode /* ignored */,
765                                 file->f_flags, &oplock, &fid.netfid, xid);
766                 if (rc == 0) {
767                         cifs_dbg(FYI, "posix open succeeded\n");
768                         posix_open_ok = true;
769                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
770                         if (tcon->ses->serverNOS)
771                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
772                                          tcon->ses->ip_addr,
773                                          tcon->ses->serverNOS);
774                         tcon->broken_posix_open = true;
775                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
776                          (rc != -EOPNOTSUPP)) /* path not found or net err */
777                         goto out;
778                 /*
779                  * Else fallthrough to retry open the old way on network i/o
780                  * or DFS errors.
781                  */
782         }
783 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
784
785         if (server->ops->get_lease_key)
786                 server->ops->get_lease_key(inode, &fid);
787
788         cifs_add_pending_open(&fid, tlink, &open);
789
790         if (!posix_open_ok) {
791                 if (server->ops->get_lease_key)
792                         server->ops->get_lease_key(inode, &fid);
793
794                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
795                                   xid, &data);
796                 if (rc) {
797                         cifs_del_pending_open(&open);
798                         goto out;
799                 }
800         }
801
802         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
803         if (cfile == NULL) {
804                 if (server->ops->close)
805                         server->ops->close(xid, tcon, &fid);
806                 cifs_del_pending_open(&open);
807                 rc = -ENOMEM;
808                 goto out;
809         }
810
811 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
812         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
813                 /*
814                  * Time to set mode which we can not set earlier due to
815                  * problems creating new read-only files.
816                  */
817                 struct cifs_unix_set_info_args args = {
818                         .mode   = inode->i_mode,
819                         .uid    = INVALID_UID, /* no change */
820                         .gid    = INVALID_GID, /* no change */
821                         .ctime  = NO_CHANGE_64,
822                         .atime  = NO_CHANGE_64,
823                         .mtime  = NO_CHANGE_64,
824                         .device = 0,
825                 };
826                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
827                                        cfile->pid);
828         }
829 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
830
831 use_cache:
832         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
833                            file->f_mode & FMODE_WRITE);
834         if (file->f_flags & O_DIRECT &&
835             (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
836              file->f_flags & O_APPEND))
837                 cifs_invalidate_cache(file_inode(file),
838                                       FSCACHE_INVAL_DIO_WRITE);
839
840 out:
841         free_dentry_path(page);
842         free_xid(xid);
843         cifs_put_tlink(tlink);
844         cifs_free_open_info(&data);
845         return rc;
846 }
847
848 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
849 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
850 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
851
852 /*
853  * Try to reacquire byte range locks that were released when session
854  * to server was lost.
855  */
856 static int
857 cifs_relock_file(struct cifsFileInfo *cfile)
858 {
859         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
860         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
861         int rc = 0;
862 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
863         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
864 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
865
866         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
867         if (cinode->can_cache_brlcks) {
868                 /* can cache locks - no need to relock */
869                 up_read(&cinode->lock_sem);
870                 return rc;
871         }
872
873 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
874         if (cap_unix(tcon->ses) &&
875             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
876             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
877                 rc = cifs_push_posix_locks(cfile);
878         else
879 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
880                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
881
882         up_read(&cinode->lock_sem);
883         return rc;
884 }
885
886 static int
887 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
888 {
889         int rc = -EACCES;
890         unsigned int xid;
891         __u32 oplock;
892         struct cifs_sb_info *cifs_sb;
893         struct cifs_tcon *tcon;
894         struct TCP_Server_Info *server;
895         struct cifsInodeInfo *cinode;
896         struct inode *inode;
897         void *page;
898         const char *full_path;
899         int desired_access;
900         int disposition = FILE_OPEN;
901         int create_options = CREATE_NOT_DIR;
902         struct cifs_open_parms oparms;
903
904         xid = get_xid();
905         mutex_lock(&cfile->fh_mutex);
906         if (!cfile->invalidHandle) {
907                 mutex_unlock(&cfile->fh_mutex);
908                 free_xid(xid);
909                 return 0;
910         }
911
912         inode = d_inode(cfile->dentry);
913         cifs_sb = CIFS_SB(inode->i_sb);
914         tcon = tlink_tcon(cfile->tlink);
915         server = tcon->ses->server;
916
917         /*
918          * Can not grab rename sem here because various ops, including those
919          * that already have the rename sem can end up causing writepage to get
920          * called and if the server was down that means we end up here, and we
921          * can never tell if the caller already has the rename_sem.
922          */
923         page = alloc_dentry_path();
924         full_path = build_path_from_dentry(cfile->dentry, page);
925         if (IS_ERR(full_path)) {
926                 mutex_unlock(&cfile->fh_mutex);
927                 free_dentry_path(page);
928                 free_xid(xid);
929                 return PTR_ERR(full_path);
930         }
931
932         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
933                  inode, cfile->f_flags, full_path);
934
935         if (tcon->ses->server->oplocks)
936                 oplock = REQ_OPLOCK;
937         else
938                 oplock = 0;
939
940 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
941         if (tcon->unix_ext && cap_unix(tcon->ses) &&
942             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
943                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
944                 /*
945                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
946                  * original open. Must mask them off for a reopen.
947                  */
948                 unsigned int oflags = cfile->f_flags &
949                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
950
951                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
952                                      cifs_sb->ctx->file_mode /* ignored */,
953                                      oflags, &oplock, &cfile->fid.netfid, xid);
954                 if (rc == 0) {
955                         cifs_dbg(FYI, "posix reopen succeeded\n");
956                         oparms.reconnect = true;
957                         goto reopen_success;
958                 }
959                 /*
960                  * fallthrough to retry open the old way on errors, especially
961                  * in the reconnect path it is important to retry hard
962                  */
963         }
964 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
965
966         desired_access = cifs_convert_flags(cfile->f_flags);
967
968         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
969         if (cfile->f_flags & O_SYNC)
970                 create_options |= CREATE_WRITE_THROUGH;
971
972         if (cfile->f_flags & O_DIRECT)
973                 create_options |= CREATE_NO_BUFFER;
974
975         if (server->ops->get_lease_key)
976                 server->ops->get_lease_key(inode, &cfile->fid);
977
978         oparms = (struct cifs_open_parms) {
979                 .tcon = tcon,
980                 .cifs_sb = cifs_sb,
981                 .desired_access = desired_access,
982                 .create_options = cifs_create_options(cifs_sb, create_options),
983                 .disposition = disposition,
984                 .path = full_path,
985                 .fid = &cfile->fid,
986                 .reconnect = true,
987         };
988
989         /*
990          * Can not refresh inode by passing in file_info buf to be returned by
991          * ops->open and then calling get_inode_info with returned buf since
992          * file might have write behind data that needs to be flushed and server
993          * version of file size can be stale. If we knew for sure that inode was
994          * not dirty locally we could do this.
995          */
996         rc = server->ops->open(xid, &oparms, &oplock, NULL);
997         if (rc == -ENOENT && oparms.reconnect == false) {
998                 /* durable handle timeout is expired - open the file again */
999                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
1000                 /* indicate that we need to relock the file */
1001                 oparms.reconnect = true;
1002         }
1003
1004         if (rc) {
1005                 mutex_unlock(&cfile->fh_mutex);
1006                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1007                 cifs_dbg(FYI, "oplock: %d\n", oplock);
1008                 goto reopen_error_exit;
1009         }
1010
1011 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1012 reopen_success:
1013 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1014         cfile->invalidHandle = false;
1015         mutex_unlock(&cfile->fh_mutex);
1016         cinode = CIFS_I(inode);
1017
1018         if (can_flush) {
1019                 rc = filemap_write_and_wait(inode->i_mapping);
1020                 if (!is_interrupt_error(rc))
1021                         mapping_set_error(inode->i_mapping, rc);
1022
1023                 if (tcon->posix_extensions)
1024                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
1025                 else if (tcon->unix_ext)
1026                         rc = cifs_get_inode_info_unix(&inode, full_path,
1027                                                       inode->i_sb, xid);
1028                 else
1029                         rc = cifs_get_inode_info(&inode, full_path, NULL,
1030                                                  inode->i_sb, xid, NULL);
1031         }
1032         /*
1033          * Else we are writing out data to server already and could deadlock if
1034          * we tried to flush data, and since we do not know if we have data that
1035          * would invalidate the current end of file on the server we can not go
1036          * to the server to get the new inode info.
1037          */
1038
1039         /*
1040          * If the server returned a read oplock and we have mandatory brlocks,
1041          * set oplock level to None.
1042          */
1043         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1044                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1045                 oplock = 0;
1046         }
1047
1048         server->ops->set_fid(cfile, &cfile->fid, oplock);
1049         if (oparms.reconnect)
1050                 cifs_relock_file(cfile);
1051
1052 reopen_error_exit:
1053         free_dentry_path(page);
1054         free_xid(xid);
1055         return rc;
1056 }
1057
1058 void smb2_deferred_work_close(struct work_struct *work)
1059 {
1060         struct cifsFileInfo *cfile = container_of(work,
1061                         struct cifsFileInfo, deferred.work);
1062
1063         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1064         cifs_del_deferred_close(cfile);
1065         cfile->deferred_close_scheduled = false;
1066         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1067         _cifsFileInfo_put(cfile, true, false);
1068 }
1069
1070 int cifs_close(struct inode *inode, struct file *file)
1071 {
1072         struct cifsFileInfo *cfile;
1073         struct cifsInodeInfo *cinode = CIFS_I(inode);
1074         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1075         struct cifs_deferred_close *dclose;
1076
1077         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1078
1079         if (file->private_data != NULL) {
1080                 cfile = file->private_data;
1081                 file->private_data = NULL;
1082                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1083                 if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1084                     && cinode->lease_granted &&
1085                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1086                     dclose) {
1087                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1088                                 inode_set_mtime_to_ts(inode,
1089                                                       inode_set_ctime_current(inode));
1090                         }
1091                         spin_lock(&cinode->deferred_lock);
1092                         cifs_add_deferred_close(cfile, dclose);
1093                         if (cfile->deferred_close_scheduled &&
1094                             delayed_work_pending(&cfile->deferred)) {
1095                                 /*
1096                                  * If there is no pending work, mod_delayed_work queues new work.
1097                                  * So, Increase the ref count to avoid use-after-free.
1098                                  */
1099                                 if (!mod_delayed_work(deferredclose_wq,
1100                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
1101                                         cifsFileInfo_get(cfile);
1102                         } else {
1103                                 /* Deferred close for files */
1104                                 queue_delayed_work(deferredclose_wq,
1105                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
1106                                 cfile->deferred_close_scheduled = true;
1107                                 spin_unlock(&cinode->deferred_lock);
1108                                 return 0;
1109                         }
1110                         spin_unlock(&cinode->deferred_lock);
1111                         _cifsFileInfo_put(cfile, true, false);
1112                 } else {
1113                         _cifsFileInfo_put(cfile, true, false);
1114                         kfree(dclose);
1115                 }
1116         }
1117
1118         /* return code from the ->release op is always ignored */
1119         return 0;
1120 }
1121
1122 void
1123 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1124 {
1125         struct cifsFileInfo *open_file, *tmp;
1126         struct list_head tmp_list;
1127
1128         if (!tcon->use_persistent || !tcon->need_reopen_files)
1129                 return;
1130
1131         tcon->need_reopen_files = false;
1132
1133         cifs_dbg(FYI, "Reopen persistent handles\n");
1134         INIT_LIST_HEAD(&tmp_list);
1135
1136         /* list all files open on tree connection, reopen resilient handles  */
1137         spin_lock(&tcon->open_file_lock);
1138         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1139                 if (!open_file->invalidHandle)
1140                         continue;
1141                 cifsFileInfo_get(open_file);
1142                 list_add_tail(&open_file->rlist, &tmp_list);
1143         }
1144         spin_unlock(&tcon->open_file_lock);
1145
1146         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1147                 if (cifs_reopen_file(open_file, false /* do not flush */))
1148                         tcon->need_reopen_files = true;
1149                 list_del_init(&open_file->rlist);
1150                 cifsFileInfo_put(open_file);
1151         }
1152 }
1153
1154 int cifs_closedir(struct inode *inode, struct file *file)
1155 {
1156         int rc = 0;
1157         unsigned int xid;
1158         struct cifsFileInfo *cfile = file->private_data;
1159         struct cifs_tcon *tcon;
1160         struct TCP_Server_Info *server;
1161         char *buf;
1162
1163         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1164
1165         if (cfile == NULL)
1166                 return rc;
1167
1168         xid = get_xid();
1169         tcon = tlink_tcon(cfile->tlink);
1170         server = tcon->ses->server;
1171
1172         cifs_dbg(FYI, "Freeing private data in close dir\n");
1173         spin_lock(&cfile->file_info_lock);
1174         if (server->ops->dir_needs_close(cfile)) {
1175                 cfile->invalidHandle = true;
1176                 spin_unlock(&cfile->file_info_lock);
1177                 if (server->ops->close_dir)
1178                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1179                 else
1180                         rc = -ENOSYS;
1181                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1182                 /* not much we can do if it fails anyway, ignore rc */
1183                 rc = 0;
1184         } else
1185                 spin_unlock(&cfile->file_info_lock);
1186
1187         buf = cfile->srch_inf.ntwrk_buf_start;
1188         if (buf) {
1189                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1190                 cfile->srch_inf.ntwrk_buf_start = NULL;
1191                 if (cfile->srch_inf.smallBuf)
1192                         cifs_small_buf_release(buf);
1193                 else
1194                         cifs_buf_release(buf);
1195         }
1196
1197         cifs_put_tlink(cfile->tlink);
1198         kfree(file->private_data);
1199         file->private_data = NULL;
1200         /* BB can we lock the filestruct while this is going on? */
1201         free_xid(xid);
1202         return rc;
1203 }
1204
1205 static struct cifsLockInfo *
1206 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1207 {
1208         struct cifsLockInfo *lock =
1209                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1210         if (!lock)
1211                 return lock;
1212         lock->offset = offset;
1213         lock->length = length;
1214         lock->type = type;
1215         lock->pid = current->tgid;
1216         lock->flags = flags;
1217         INIT_LIST_HEAD(&lock->blist);
1218         init_waitqueue_head(&lock->block_q);
1219         return lock;
1220 }
1221
1222 void
1223 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1224 {
1225         struct cifsLockInfo *li, *tmp;
1226         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1227                 list_del_init(&li->blist);
1228                 wake_up(&li->block_q);
1229         }
1230 }
1231
1232 #define CIFS_LOCK_OP    0
1233 #define CIFS_READ_OP    1
1234 #define CIFS_WRITE_OP   2
1235
1236 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1237 static bool
1238 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1239                             __u64 length, __u8 type, __u16 flags,
1240                             struct cifsFileInfo *cfile,
1241                             struct cifsLockInfo **conf_lock, int rw_check)
1242 {
1243         struct cifsLockInfo *li;
1244         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1245         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1246
1247         list_for_each_entry(li, &fdlocks->locks, llist) {
1248                 if (offset + length <= li->offset ||
1249                     offset >= li->offset + li->length)
1250                         continue;
1251                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1252                     server->ops->compare_fids(cfile, cur_cfile)) {
1253                         /* shared lock prevents write op through the same fid */
1254                         if (!(li->type & server->vals->shared_lock_type) ||
1255                             rw_check != CIFS_WRITE_OP)
1256                                 continue;
1257                 }
1258                 if ((type & server->vals->shared_lock_type) &&
1259                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1260                      current->tgid == li->pid) || type == li->type))
1261                         continue;
1262                 if (rw_check == CIFS_LOCK_OP &&
1263                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1264                     server->ops->compare_fids(cfile, cur_cfile))
1265                         continue;
1266                 if (conf_lock)
1267                         *conf_lock = li;
1268                 return true;
1269         }
1270         return false;
1271 }
1272
1273 bool
1274 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1275                         __u8 type, __u16 flags,
1276                         struct cifsLockInfo **conf_lock, int rw_check)
1277 {
1278         bool rc = false;
1279         struct cifs_fid_locks *cur;
1280         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1281
1282         list_for_each_entry(cur, &cinode->llist, llist) {
1283                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1284                                                  flags, cfile, conf_lock,
1285                                                  rw_check);
1286                 if (rc)
1287                         break;
1288         }
1289
1290         return rc;
1291 }
1292
1293 /*
1294  * Check if there is another lock that prevents us to set the lock (mandatory
1295  * style). If such a lock exists, update the flock structure with its
1296  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1297  * or leave it the same if we can't. Returns 0 if we don't need to request to
1298  * the server or 1 otherwise.
1299  */
1300 static int
1301 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1302                __u8 type, struct file_lock *flock)
1303 {
1304         int rc = 0;
1305         struct cifsLockInfo *conf_lock;
1306         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1307         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1308         bool exist;
1309
1310         down_read(&cinode->lock_sem);
1311
1312         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1313                                         flock->fl_flags, &conf_lock,
1314                                         CIFS_LOCK_OP);
1315         if (exist) {
1316                 flock->fl_start = conf_lock->offset;
1317                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1318                 flock->fl_pid = conf_lock->pid;
1319                 if (conf_lock->type & server->vals->shared_lock_type)
1320                         flock->fl_type = F_RDLCK;
1321                 else
1322                         flock->fl_type = F_WRLCK;
1323         } else if (!cinode->can_cache_brlcks)
1324                 rc = 1;
1325         else
1326                 flock->fl_type = F_UNLCK;
1327
1328         up_read(&cinode->lock_sem);
1329         return rc;
1330 }
1331
1332 static void
1333 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1334 {
1335         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1336         cifs_down_write(&cinode->lock_sem);
1337         list_add_tail(&lock->llist, &cfile->llist->locks);
1338         up_write(&cinode->lock_sem);
1339 }
1340
1341 /*
1342  * Set the byte-range lock (mandatory style). Returns:
1343  * 1) 0, if we set the lock and don't need to request to the server;
1344  * 2) 1, if no locks prevent us but we need to request to the server;
1345  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1346  */
1347 static int
1348 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1349                  bool wait)
1350 {
1351         struct cifsLockInfo *conf_lock;
1352         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1353         bool exist;
1354         int rc = 0;
1355
1356 try_again:
1357         exist = false;
1358         cifs_down_write(&cinode->lock_sem);
1359
1360         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1361                                         lock->type, lock->flags, &conf_lock,
1362                                         CIFS_LOCK_OP);
1363         if (!exist && cinode->can_cache_brlcks) {
1364                 list_add_tail(&lock->llist, &cfile->llist->locks);
1365                 up_write(&cinode->lock_sem);
1366                 return rc;
1367         }
1368
1369         if (!exist)
1370                 rc = 1;
1371         else if (!wait)
1372                 rc = -EACCES;
1373         else {
1374                 list_add_tail(&lock->blist, &conf_lock->blist);
1375                 up_write(&cinode->lock_sem);
1376                 rc = wait_event_interruptible(lock->block_q,
1377                                         (lock->blist.prev == &lock->blist) &&
1378                                         (lock->blist.next == &lock->blist));
1379                 if (!rc)
1380                         goto try_again;
1381                 cifs_down_write(&cinode->lock_sem);
1382                 list_del_init(&lock->blist);
1383         }
1384
1385         up_write(&cinode->lock_sem);
1386         return rc;
1387 }
1388
1389 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1390 /*
1391  * Check if there is another lock that prevents us to set the lock (posix
1392  * style). If such a lock exists, update the flock structure with its
1393  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1394  * or leave it the same if we can't. Returns 0 if we don't need to request to
1395  * the server or 1 otherwise.
1396  */
1397 static int
1398 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1399 {
1400         int rc = 0;
1401         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1402         unsigned char saved_type = flock->fl_type;
1403
1404         if ((flock->fl_flags & FL_POSIX) == 0)
1405                 return 1;
1406
1407         down_read(&cinode->lock_sem);
1408         posix_test_lock(file, flock);
1409
1410         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1411                 flock->fl_type = saved_type;
1412                 rc = 1;
1413         }
1414
1415         up_read(&cinode->lock_sem);
1416         return rc;
1417 }
1418
1419 /*
1420  * Set the byte-range lock (posix style). Returns:
1421  * 1) <0, if the error occurs while setting the lock;
1422  * 2) 0, if we set the lock and don't need to request to the server;
1423  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1424  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1425  */
1426 static int
1427 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1428 {
1429         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1430         int rc = FILE_LOCK_DEFERRED + 1;
1431
1432         if ((flock->fl_flags & FL_POSIX) == 0)
1433                 return rc;
1434
1435         cifs_down_write(&cinode->lock_sem);
1436         if (!cinode->can_cache_brlcks) {
1437                 up_write(&cinode->lock_sem);
1438                 return rc;
1439         }
1440
1441         rc = posix_lock_file(file, flock, NULL);
1442         up_write(&cinode->lock_sem);
1443         return rc;
1444 }
1445
1446 int
1447 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1448 {
1449         unsigned int xid;
1450         int rc = 0, stored_rc;
1451         struct cifsLockInfo *li, *tmp;
1452         struct cifs_tcon *tcon;
1453         unsigned int num, max_num, max_buf;
1454         LOCKING_ANDX_RANGE *buf, *cur;
1455         static const int types[] = {
1456                 LOCKING_ANDX_LARGE_FILES,
1457                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1458         };
1459         int i;
1460
1461         xid = get_xid();
1462         tcon = tlink_tcon(cfile->tlink);
1463
1464         /*
1465          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1466          * and check it before using.
1467          */
1468         max_buf = tcon->ses->server->maxBuf;
1469         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1470                 free_xid(xid);
1471                 return -EINVAL;
1472         }
1473
1474         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1475                      PAGE_SIZE);
1476         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1477                         PAGE_SIZE);
1478         max_num = (max_buf - sizeof(struct smb_hdr)) /
1479                                                 sizeof(LOCKING_ANDX_RANGE);
1480         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1481         if (!buf) {
1482                 free_xid(xid);
1483                 return -ENOMEM;
1484         }
1485
1486         for (i = 0; i < 2; i++) {
1487                 cur = buf;
1488                 num = 0;
1489                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1490                         if (li->type != types[i])
1491                                 continue;
1492                         cur->Pid = cpu_to_le16(li->pid);
1493                         cur->LengthLow = cpu_to_le32((u32)li->length);
1494                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1495                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1496                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1497                         if (++num == max_num) {
1498                                 stored_rc = cifs_lockv(xid, tcon,
1499                                                        cfile->fid.netfid,
1500                                                        (__u8)li->type, 0, num,
1501                                                        buf);
1502                                 if (stored_rc)
1503                                         rc = stored_rc;
1504                                 cur = buf;
1505                                 num = 0;
1506                         } else
1507                                 cur++;
1508                 }
1509
1510                 if (num) {
1511                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1512                                                (__u8)types[i], 0, num, buf);
1513                         if (stored_rc)
1514                                 rc = stored_rc;
1515                 }
1516         }
1517
1518         kfree(buf);
1519         free_xid(xid);
1520         return rc;
1521 }
1522
1523 static __u32
1524 hash_lockowner(fl_owner_t owner)
1525 {
1526         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1527 }
1528 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1529
1530 struct lock_to_push {
1531         struct list_head llist;
1532         __u64 offset;
1533         __u64 length;
1534         __u32 pid;
1535         __u16 netfid;
1536         __u8 type;
1537 };
1538
1539 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1540 static int
1541 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1542 {
1543         struct inode *inode = d_inode(cfile->dentry);
1544         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1545         struct file_lock *flock;
1546         struct file_lock_context *flctx = locks_inode_context(inode);
1547         unsigned int count = 0, i;
1548         int rc = 0, xid, type;
1549         struct list_head locks_to_send, *el;
1550         struct lock_to_push *lck, *tmp;
1551         __u64 length;
1552
1553         xid = get_xid();
1554
1555         if (!flctx)
1556                 goto out;
1557
1558         spin_lock(&flctx->flc_lock);
1559         list_for_each(el, &flctx->flc_posix) {
1560                 count++;
1561         }
1562         spin_unlock(&flctx->flc_lock);
1563
1564         INIT_LIST_HEAD(&locks_to_send);
1565
1566         /*
1567          * Allocating count locks is enough because no FL_POSIX locks can be
1568          * added to the list while we are holding cinode->lock_sem that
1569          * protects locking operations of this inode.
1570          */
1571         for (i = 0; i < count; i++) {
1572                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1573                 if (!lck) {
1574                         rc = -ENOMEM;
1575                         goto err_out;
1576                 }
1577                 list_add_tail(&lck->llist, &locks_to_send);
1578         }
1579
1580         el = locks_to_send.next;
1581         spin_lock(&flctx->flc_lock);
1582         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1583                 if (el == &locks_to_send) {
1584                         /*
1585                          * The list ended. We don't have enough allocated
1586                          * structures - something is really wrong.
1587                          */
1588                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1589                         break;
1590                 }
1591                 length = cifs_flock_len(flock);
1592                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1593                         type = CIFS_RDLCK;
1594                 else
1595                         type = CIFS_WRLCK;
1596                 lck = list_entry(el, struct lock_to_push, llist);
1597                 lck->pid = hash_lockowner(flock->fl_owner);
1598                 lck->netfid = cfile->fid.netfid;
1599                 lck->length = length;
1600                 lck->type = type;
1601                 lck->offset = flock->fl_start;
1602         }
1603         spin_unlock(&flctx->flc_lock);
1604
1605         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1606                 int stored_rc;
1607
1608                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1609                                              lck->offset, lck->length, NULL,
1610                                              lck->type, 0);
1611                 if (stored_rc)
1612                         rc = stored_rc;
1613                 list_del(&lck->llist);
1614                 kfree(lck);
1615         }
1616
1617 out:
1618         free_xid(xid);
1619         return rc;
1620 err_out:
1621         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1622                 list_del(&lck->llist);
1623                 kfree(lck);
1624         }
1625         goto out;
1626 }
1627 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1628
1629 static int
1630 cifs_push_locks(struct cifsFileInfo *cfile)
1631 {
1632         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1633         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1634         int rc = 0;
1635 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1636         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1637 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1638
1639         /* we are going to update can_cache_brlcks here - need a write access */
1640         cifs_down_write(&cinode->lock_sem);
1641         if (!cinode->can_cache_brlcks) {
1642                 up_write(&cinode->lock_sem);
1643                 return rc;
1644         }
1645
1646 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1647         if (cap_unix(tcon->ses) &&
1648             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1649             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1650                 rc = cifs_push_posix_locks(cfile);
1651         else
1652 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1653                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1654
1655         cinode->can_cache_brlcks = false;
1656         up_write(&cinode->lock_sem);
1657         return rc;
1658 }
1659
1660 static void
1661 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1662                 bool *wait_flag, struct TCP_Server_Info *server)
1663 {
1664         if (flock->fl_flags & FL_POSIX)
1665                 cifs_dbg(FYI, "Posix\n");
1666         if (flock->fl_flags & FL_FLOCK)
1667                 cifs_dbg(FYI, "Flock\n");
1668         if (flock->fl_flags & FL_SLEEP) {
1669                 cifs_dbg(FYI, "Blocking lock\n");
1670                 *wait_flag = true;
1671         }
1672         if (flock->fl_flags & FL_ACCESS)
1673                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1674         if (flock->fl_flags & FL_LEASE)
1675                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1676         if (flock->fl_flags &
1677             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1678                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1679                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1680
1681         *type = server->vals->large_lock_type;
1682         if (flock->fl_type == F_WRLCK) {
1683                 cifs_dbg(FYI, "F_WRLCK\n");
1684                 *type |= server->vals->exclusive_lock_type;
1685                 *lock = 1;
1686         } else if (flock->fl_type == F_UNLCK) {
1687                 cifs_dbg(FYI, "F_UNLCK\n");
1688                 *type |= server->vals->unlock_lock_type;
1689                 *unlock = 1;
1690                 /* Check if unlock includes more than one lock range */
1691         } else if (flock->fl_type == F_RDLCK) {
1692                 cifs_dbg(FYI, "F_RDLCK\n");
1693                 *type |= server->vals->shared_lock_type;
1694                 *lock = 1;
1695         } else if (flock->fl_type == F_EXLCK) {
1696                 cifs_dbg(FYI, "F_EXLCK\n");
1697                 *type |= server->vals->exclusive_lock_type;
1698                 *lock = 1;
1699         } else if (flock->fl_type == F_SHLCK) {
1700                 cifs_dbg(FYI, "F_SHLCK\n");
1701                 *type |= server->vals->shared_lock_type;
1702                 *lock = 1;
1703         } else
1704                 cifs_dbg(FYI, "Unknown type of lock\n");
1705 }
1706
1707 static int
1708 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1709            bool wait_flag, bool posix_lck, unsigned int xid)
1710 {
1711         int rc = 0;
1712         __u64 length = cifs_flock_len(flock);
1713         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1714         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1715         struct TCP_Server_Info *server = tcon->ses->server;
1716 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1717         __u16 netfid = cfile->fid.netfid;
1718
1719         if (posix_lck) {
1720                 int posix_lock_type;
1721
1722                 rc = cifs_posix_lock_test(file, flock);
1723                 if (!rc)
1724                         return rc;
1725
1726                 if (type & server->vals->shared_lock_type)
1727                         posix_lock_type = CIFS_RDLCK;
1728                 else
1729                         posix_lock_type = CIFS_WRLCK;
1730                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1731                                       hash_lockowner(flock->fl_owner),
1732                                       flock->fl_start, length, flock,
1733                                       posix_lock_type, wait_flag);
1734                 return rc;
1735         }
1736 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1737
1738         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1739         if (!rc)
1740                 return rc;
1741
1742         /* BB we could chain these into one lock request BB */
1743         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1744                                     1, 0, false);
1745         if (rc == 0) {
1746                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1747                                             type, 0, 1, false);
1748                 flock->fl_type = F_UNLCK;
1749                 if (rc != 0)
1750                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1751                                  rc);
1752                 return 0;
1753         }
1754
1755         if (type & server->vals->shared_lock_type) {
1756                 flock->fl_type = F_WRLCK;
1757                 return 0;
1758         }
1759
1760         type &= ~server->vals->exclusive_lock_type;
1761
1762         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1763                                     type | server->vals->shared_lock_type,
1764                                     1, 0, false);
1765         if (rc == 0) {
1766                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1767                         type | server->vals->shared_lock_type, 0, 1, false);
1768                 flock->fl_type = F_RDLCK;
1769                 if (rc != 0)
1770                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1771                                  rc);
1772         } else
1773                 flock->fl_type = F_WRLCK;
1774
1775         return 0;
1776 }
1777
1778 void
1779 cifs_move_llist(struct list_head *source, struct list_head *dest)
1780 {
1781         struct list_head *li, *tmp;
1782         list_for_each_safe(li, tmp, source)
1783                 list_move(li, dest);
1784 }
1785
1786 void
1787 cifs_free_llist(struct list_head *llist)
1788 {
1789         struct cifsLockInfo *li, *tmp;
1790         list_for_each_entry_safe(li, tmp, llist, llist) {
1791                 cifs_del_lock_waiters(li);
1792                 list_del(&li->llist);
1793                 kfree(li);
1794         }
1795 }
1796
1797 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1798 int
1799 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1800                   unsigned int xid)
1801 {
1802         int rc = 0, stored_rc;
1803         static const int types[] = {
1804                 LOCKING_ANDX_LARGE_FILES,
1805                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1806         };
1807         unsigned int i;
1808         unsigned int max_num, num, max_buf;
1809         LOCKING_ANDX_RANGE *buf, *cur;
1810         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1811         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1812         struct cifsLockInfo *li, *tmp;
1813         __u64 length = cifs_flock_len(flock);
1814         struct list_head tmp_llist;
1815
1816         INIT_LIST_HEAD(&tmp_llist);
1817
1818         /*
1819          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1820          * and check it before using.
1821          */
1822         max_buf = tcon->ses->server->maxBuf;
1823         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1824                 return -EINVAL;
1825
1826         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1827                      PAGE_SIZE);
1828         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1829                         PAGE_SIZE);
1830         max_num = (max_buf - sizeof(struct smb_hdr)) /
1831                                                 sizeof(LOCKING_ANDX_RANGE);
1832         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1833         if (!buf)
1834                 return -ENOMEM;
1835
1836         cifs_down_write(&cinode->lock_sem);
1837         for (i = 0; i < 2; i++) {
1838                 cur = buf;
1839                 num = 0;
1840                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1841                         if (flock->fl_start > li->offset ||
1842                             (flock->fl_start + length) <
1843                             (li->offset + li->length))
1844                                 continue;
1845                         if (current->tgid != li->pid)
1846                                 continue;
1847                         if (types[i] != li->type)
1848                                 continue;
1849                         if (cinode->can_cache_brlcks) {
1850                                 /*
1851                                  * We can cache brlock requests - simply remove
1852                                  * a lock from the file's list.
1853                                  */
1854                                 list_del(&li->llist);
1855                                 cifs_del_lock_waiters(li);
1856                                 kfree(li);
1857                                 continue;
1858                         }
1859                         cur->Pid = cpu_to_le16(li->pid);
1860                         cur->LengthLow = cpu_to_le32((u32)li->length);
1861                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1862                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1863                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1864                         /*
1865                          * We need to save a lock here to let us add it again to
1866                          * the file's list if the unlock range request fails on
1867                          * the server.
1868                          */
1869                         list_move(&li->llist, &tmp_llist);
1870                         if (++num == max_num) {
1871                                 stored_rc = cifs_lockv(xid, tcon,
1872                                                        cfile->fid.netfid,
1873                                                        li->type, num, 0, buf);
1874                                 if (stored_rc) {
1875                                         /*
1876                                          * We failed on the unlock range
1877                                          * request - add all locks from the tmp
1878                                          * list to the head of the file's list.
1879                                          */
1880                                         cifs_move_llist(&tmp_llist,
1881                                                         &cfile->llist->locks);
1882                                         rc = stored_rc;
1883                                 } else
1884                                         /*
1885                                          * The unlock range request succeed -
1886                                          * free the tmp list.
1887                                          */
1888                                         cifs_free_llist(&tmp_llist);
1889                                 cur = buf;
1890                                 num = 0;
1891                         } else
1892                                 cur++;
1893                 }
1894                 if (num) {
1895                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1896                                                types[i], num, 0, buf);
1897                         if (stored_rc) {
1898                                 cifs_move_llist(&tmp_llist,
1899                                                 &cfile->llist->locks);
1900                                 rc = stored_rc;
1901                         } else
1902                                 cifs_free_llist(&tmp_llist);
1903                 }
1904         }
1905
1906         up_write(&cinode->lock_sem);
1907         kfree(buf);
1908         return rc;
1909 }
1910 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1911
1912 static int
1913 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1914            bool wait_flag, bool posix_lck, int lock, int unlock,
1915            unsigned int xid)
1916 {
1917         int rc = 0;
1918         __u64 length = cifs_flock_len(flock);
1919         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1920         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1921         struct TCP_Server_Info *server = tcon->ses->server;
1922         struct inode *inode = d_inode(cfile->dentry);
1923
1924 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1925         if (posix_lck) {
1926                 int posix_lock_type;
1927
1928                 rc = cifs_posix_lock_set(file, flock);
1929                 if (rc <= FILE_LOCK_DEFERRED)
1930                         return rc;
1931
1932                 if (type & server->vals->shared_lock_type)
1933                         posix_lock_type = CIFS_RDLCK;
1934                 else
1935                         posix_lock_type = CIFS_WRLCK;
1936
1937                 if (unlock == 1)
1938                         posix_lock_type = CIFS_UNLCK;
1939
1940                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1941                                       hash_lockowner(flock->fl_owner),
1942                                       flock->fl_start, length,
1943                                       NULL, posix_lock_type, wait_flag);
1944                 goto out;
1945         }
1946 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1947         if (lock) {
1948                 struct cifsLockInfo *lock;
1949
1950                 lock = cifs_lock_init(flock->fl_start, length, type,
1951                                       flock->fl_flags);
1952                 if (!lock)
1953                         return -ENOMEM;
1954
1955                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1956                 if (rc < 0) {
1957                         kfree(lock);
1958                         return rc;
1959                 }
1960                 if (!rc)
1961                         goto out;
1962
1963                 /*
1964                  * Windows 7 server can delay breaking lease from read to None
1965                  * if we set a byte-range lock on a file - break it explicitly
1966                  * before sending the lock to the server to be sure the next
1967                  * read won't conflict with non-overlapted locks due to
1968                  * pagereading.
1969                  */
1970                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1971                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1972                         cifs_zap_mapping(inode);
1973                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1974                                  inode);
1975                         CIFS_I(inode)->oplock = 0;
1976                 }
1977
1978                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1979                                             type, 1, 0, wait_flag);
1980                 if (rc) {
1981                         kfree(lock);
1982                         return rc;
1983                 }
1984
1985                 cifs_lock_add(cfile, lock);
1986         } else if (unlock)
1987                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1988
1989 out:
1990         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1991                 /*
1992                  * If this is a request to remove all locks because we
1993                  * are closing the file, it doesn't matter if the
1994                  * unlocking failed as both cifs.ko and the SMB server
1995                  * remove the lock on file close
1996                  */
1997                 if (rc) {
1998                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1999                         if (!(flock->fl_flags & FL_CLOSE))
2000                                 return rc;
2001                 }
2002                 rc = locks_lock_file_wait(file, flock);
2003         }
2004         return rc;
2005 }
2006
2007 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2008 {
2009         int rc, xid;
2010         int lock = 0, unlock = 0;
2011         bool wait_flag = false;
2012         bool posix_lck = false;
2013         struct cifs_sb_info *cifs_sb;
2014         struct cifs_tcon *tcon;
2015         struct cifsFileInfo *cfile;
2016         __u32 type;
2017
2018         xid = get_xid();
2019
2020         if (!(fl->fl_flags & FL_FLOCK)) {
2021                 rc = -ENOLCK;
2022                 free_xid(xid);
2023                 return rc;
2024         }
2025
2026         cfile = (struct cifsFileInfo *)file->private_data;
2027         tcon = tlink_tcon(cfile->tlink);
2028
2029         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2030                         tcon->ses->server);
2031         cifs_sb = CIFS_FILE_SB(file);
2032
2033         if (cap_unix(tcon->ses) &&
2034             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2035             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2036                 posix_lck = true;
2037
2038         if (!lock && !unlock) {
2039                 /*
2040                  * if no lock or unlock then nothing to do since we do not
2041                  * know what it is
2042                  */
2043                 rc = -EOPNOTSUPP;
2044                 free_xid(xid);
2045                 return rc;
2046         }
2047
2048         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2049                         xid);
2050         free_xid(xid);
2051         return rc;
2052
2053
2054 }
2055
2056 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2057 {
2058         int rc, xid;
2059         int lock = 0, unlock = 0;
2060         bool wait_flag = false;
2061         bool posix_lck = false;
2062         struct cifs_sb_info *cifs_sb;
2063         struct cifs_tcon *tcon;
2064         struct cifsFileInfo *cfile;
2065         __u32 type;
2066
2067         rc = -EACCES;
2068         xid = get_xid();
2069
2070         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2071                  flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2072                  (long long)flock->fl_end);
2073
2074         cfile = (struct cifsFileInfo *)file->private_data;
2075         tcon = tlink_tcon(cfile->tlink);
2076
2077         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2078                         tcon->ses->server);
2079         cifs_sb = CIFS_FILE_SB(file);
2080         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2081
2082         if (cap_unix(tcon->ses) &&
2083             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2084             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2085                 posix_lck = true;
2086         /*
2087          * BB add code here to normalize offset and length to account for
2088          * negative length which we can not accept over the wire.
2089          */
2090         if (IS_GETLK(cmd)) {
2091                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2092                 free_xid(xid);
2093                 return rc;
2094         }
2095
2096         if (!lock && !unlock) {
2097                 /*
2098                  * if no lock or unlock then nothing to do since we do not
2099                  * know what it is
2100                  */
2101                 free_xid(xid);
2102                 return -EOPNOTSUPP;
2103         }
2104
2105         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2106                         xid);
2107         free_xid(xid);
2108         return rc;
2109 }
2110
2111 /*
2112  * update the file size (if needed) after a write. Should be called with
2113  * the inode->i_lock held
2114  */
2115 void
2116 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2117                       unsigned int bytes_written)
2118 {
2119         loff_t end_of_write = offset + bytes_written;
2120
2121         if (end_of_write > cifsi->server_eof)
2122                 cifsi->server_eof = end_of_write;
2123 }
2124
2125 static ssize_t
2126 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2127            size_t write_size, loff_t *offset)
2128 {
2129         int rc = 0;
2130         unsigned int bytes_written = 0;
2131         unsigned int total_written;
2132         struct cifs_tcon *tcon;
2133         struct TCP_Server_Info *server;
2134         unsigned int xid;
2135         struct dentry *dentry = open_file->dentry;
2136         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2137         struct cifs_io_parms io_parms = {0};
2138
2139         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2140                  write_size, *offset, dentry);
2141
2142         tcon = tlink_tcon(open_file->tlink);
2143         server = tcon->ses->server;
2144
2145         if (!server->ops->sync_write)
2146                 return -ENOSYS;
2147
2148         xid = get_xid();
2149
2150         for (total_written = 0; write_size > total_written;
2151              total_written += bytes_written) {
2152                 rc = -EAGAIN;
2153                 while (rc == -EAGAIN) {
2154                         struct kvec iov[2];
2155                         unsigned int len;
2156
2157                         if (open_file->invalidHandle) {
2158                                 /* we could deadlock if we called
2159                                    filemap_fdatawait from here so tell
2160                                    reopen_file not to flush data to
2161                                    server now */
2162                                 rc = cifs_reopen_file(open_file, false);
2163                                 if (rc != 0)
2164                                         break;
2165                         }
2166
2167                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2168                                   (unsigned int)write_size - total_written);
2169                         /* iov[0] is reserved for smb header */
2170                         iov[1].iov_base = (char *)write_data + total_written;
2171                         iov[1].iov_len = len;
2172                         io_parms.pid = pid;
2173                         io_parms.tcon = tcon;
2174                         io_parms.offset = *offset;
2175                         io_parms.length = len;
2176                         rc = server->ops->sync_write(xid, &open_file->fid,
2177                                         &io_parms, &bytes_written, iov, 1);
2178                 }
2179                 if (rc || (bytes_written == 0)) {
2180                         if (total_written)
2181                                 break;
2182                         else {
2183                                 free_xid(xid);
2184                                 return rc;
2185                         }
2186                 } else {
2187                         spin_lock(&d_inode(dentry)->i_lock);
2188                         cifs_update_eof(cifsi, *offset, bytes_written);
2189                         spin_unlock(&d_inode(dentry)->i_lock);
2190                         *offset += bytes_written;
2191                 }
2192         }
2193
2194         cifs_stats_bytes_written(tcon, total_written);
2195
2196         if (total_written > 0) {
2197                 spin_lock(&d_inode(dentry)->i_lock);
2198                 if (*offset > d_inode(dentry)->i_size) {
2199                         i_size_write(d_inode(dentry), *offset);
2200                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2201                 }
2202                 spin_unlock(&d_inode(dentry)->i_lock);
2203         }
2204         mark_inode_dirty_sync(d_inode(dentry));
2205         free_xid(xid);
2206         return total_written;
2207 }
2208
2209 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2210                                         bool fsuid_only)
2211 {
2212         struct cifsFileInfo *open_file = NULL;
2213         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2214
2215         /* only filter by fsuid on multiuser mounts */
2216         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2217                 fsuid_only = false;
2218
2219         spin_lock(&cifs_inode->open_file_lock);
2220         /* we could simply get the first_list_entry since write-only entries
2221            are always at the end of the list but since the first entry might
2222            have a close pending, we go through the whole list */
2223         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2224                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2225                         continue;
2226                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2227                         if ((!open_file->invalidHandle)) {
2228                                 /* found a good file */
2229                                 /* lock it so it will not be closed on us */
2230                                 cifsFileInfo_get(open_file);
2231                                 spin_unlock(&cifs_inode->open_file_lock);
2232                                 return open_file;
2233                         } /* else might as well continue, and look for
2234                              another, or simply have the caller reopen it
2235                              again rather than trying to fix this handle */
2236                 } else /* write only file */
2237                         break; /* write only files are last so must be done */
2238         }
2239         spin_unlock(&cifs_inode->open_file_lock);
2240         return NULL;
2241 }
2242
2243 /* Return -EBADF if no handle is found and general rc otherwise */
2244 int
2245 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2246                        struct cifsFileInfo **ret_file)
2247 {
2248         struct cifsFileInfo *open_file, *inv_file = NULL;
2249         struct cifs_sb_info *cifs_sb;
2250         bool any_available = false;
2251         int rc = -EBADF;
2252         unsigned int refind = 0;
2253         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2254         bool with_delete = flags & FIND_WR_WITH_DELETE;
2255         *ret_file = NULL;
2256
2257         /*
2258          * Having a null inode here (because mapping->host was set to zero by
2259          * the VFS or MM) should not happen but we had reports of on oops (due
2260          * to it being zero) during stress testcases so we need to check for it
2261          */
2262
2263         if (cifs_inode == NULL) {
2264                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2265                 dump_stack();
2266                 return rc;
2267         }
2268
2269         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2270
2271         /* only filter by fsuid on multiuser mounts */
2272         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2273                 fsuid_only = false;
2274
2275         spin_lock(&cifs_inode->open_file_lock);
2276 refind_writable:
2277         if (refind > MAX_REOPEN_ATT) {
2278                 spin_unlock(&cifs_inode->open_file_lock);
2279                 return rc;
2280         }
2281         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2282                 if (!any_available && open_file->pid != current->tgid)
2283                         continue;
2284                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2285                         continue;
2286                 if (with_delete && !(open_file->fid.access & DELETE))
2287                         continue;
2288                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2289                         if (!open_file->invalidHandle) {
2290                                 /* found a good writable file */
2291                                 cifsFileInfo_get(open_file);
2292                                 spin_unlock(&cifs_inode->open_file_lock);
2293                                 *ret_file = open_file;
2294                                 return 0;
2295                         } else {
2296                                 if (!inv_file)
2297                                         inv_file = open_file;
2298                         }
2299                 }
2300         }
2301         /* couldn't find useable FH with same pid, try any available */
2302         if (!any_available) {
2303                 any_available = true;
2304                 goto refind_writable;
2305         }
2306
2307         if (inv_file) {
2308                 any_available = false;
2309                 cifsFileInfo_get(inv_file);
2310         }
2311
2312         spin_unlock(&cifs_inode->open_file_lock);
2313
2314         if (inv_file) {
2315                 rc = cifs_reopen_file(inv_file, false);
2316                 if (!rc) {
2317                         *ret_file = inv_file;
2318                         return 0;
2319                 }
2320
2321                 spin_lock(&cifs_inode->open_file_lock);
2322                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2323                 spin_unlock(&cifs_inode->open_file_lock);
2324                 cifsFileInfo_put(inv_file);
2325                 ++refind;
2326                 inv_file = NULL;
2327                 spin_lock(&cifs_inode->open_file_lock);
2328                 goto refind_writable;
2329         }
2330
2331         return rc;
2332 }
2333
2334 struct cifsFileInfo *
2335 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2336 {
2337         struct cifsFileInfo *cfile;
2338         int rc;
2339
2340         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2341         if (rc)
2342                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2343
2344         return cfile;
2345 }
2346
2347 int
2348 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2349                        int flags,
2350                        struct cifsFileInfo **ret_file)
2351 {
2352         struct cifsFileInfo *cfile;
2353         void *page = alloc_dentry_path();
2354
2355         *ret_file = NULL;
2356
2357         spin_lock(&tcon->open_file_lock);
2358         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2359                 struct cifsInodeInfo *cinode;
2360                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2361                 if (IS_ERR(full_path)) {
2362                         spin_unlock(&tcon->open_file_lock);
2363                         free_dentry_path(page);
2364                         return PTR_ERR(full_path);
2365                 }
2366                 if (strcmp(full_path, name))
2367                         continue;
2368
2369                 cinode = CIFS_I(d_inode(cfile->dentry));
2370                 spin_unlock(&tcon->open_file_lock);
2371                 free_dentry_path(page);
2372                 return cifs_get_writable_file(cinode, flags, ret_file);
2373         }
2374
2375         spin_unlock(&tcon->open_file_lock);
2376         free_dentry_path(page);
2377         return -ENOENT;
2378 }
2379
2380 int
2381 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2382                        struct cifsFileInfo **ret_file)
2383 {
2384         struct cifsFileInfo *cfile;
2385         void *page = alloc_dentry_path();
2386
2387         *ret_file = NULL;
2388
2389         spin_lock(&tcon->open_file_lock);
2390         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2391                 struct cifsInodeInfo *cinode;
2392                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2393                 if (IS_ERR(full_path)) {
2394                         spin_unlock(&tcon->open_file_lock);
2395                         free_dentry_path(page);
2396                         return PTR_ERR(full_path);
2397                 }
2398                 if (strcmp(full_path, name))
2399                         continue;
2400
2401                 cinode = CIFS_I(d_inode(cfile->dentry));
2402                 spin_unlock(&tcon->open_file_lock);
2403                 free_dentry_path(page);
2404                 *ret_file = find_readable_file(cinode, 0);
2405                 return *ret_file ? 0 : -ENOENT;
2406         }
2407
2408         spin_unlock(&tcon->open_file_lock);
2409         free_dentry_path(page);
2410         return -ENOENT;
2411 }
2412
2413 void
2414 cifs_writedata_release(struct kref *refcount)
2415 {
2416         struct cifs_writedata *wdata = container_of(refcount,
2417                                         struct cifs_writedata, refcount);
2418 #ifdef CONFIG_CIFS_SMB_DIRECT
2419         if (wdata->mr) {
2420                 smbd_deregister_mr(wdata->mr);
2421                 wdata->mr = NULL;
2422         }
2423 #endif
2424
2425         if (wdata->cfile)
2426                 cifsFileInfo_put(wdata->cfile);
2427
2428         kfree(wdata);
2429 }
2430
2431 /*
2432  * Write failed with a retryable error. Resend the write request. It's also
2433  * possible that the page was redirtied so re-clean the page.
2434  */
2435 static void
2436 cifs_writev_requeue(struct cifs_writedata *wdata)
2437 {
2438         int rc = 0;
2439         struct inode *inode = d_inode(wdata->cfile->dentry);
2440         struct TCP_Server_Info *server;
2441         unsigned int rest_len = wdata->bytes;
2442         loff_t fpos = wdata->offset;
2443
2444         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2445         do {
2446                 struct cifs_writedata *wdata2;
2447                 unsigned int wsize, cur_len;
2448
2449                 wsize = server->ops->wp_retry_size(inode);
2450                 if (wsize < rest_len) {
2451                         if (wsize < PAGE_SIZE) {
2452                                 rc = -EOPNOTSUPP;
2453                                 break;
2454                         }
2455                         cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2456                 } else {
2457                         cur_len = rest_len;
2458                 }
2459
2460                 wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2461                 if (!wdata2) {
2462                         rc = -ENOMEM;
2463                         break;
2464                 }
2465
2466                 wdata2->sync_mode = wdata->sync_mode;
2467                 wdata2->offset  = fpos;
2468                 wdata2->bytes   = cur_len;
2469                 wdata2->iter    = wdata->iter;
2470
2471                 iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2472                 iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2473
2474                 if (iov_iter_is_xarray(&wdata2->iter))
2475                         /* Check for pages having been redirtied and clean
2476                          * them.  We can do this by walking the xarray.  If
2477                          * it's not an xarray, then it's a DIO and we shouldn't
2478                          * be mucking around with the page bits.
2479                          */
2480                         cifs_undirty_folios(inode, fpos, cur_len);
2481
2482                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2483                                             &wdata2->cfile);
2484                 if (!wdata2->cfile) {
2485                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2486                                  rc);
2487                         if (!is_retryable_error(rc))
2488                                 rc = -EBADF;
2489                 } else {
2490                         wdata2->pid = wdata2->cfile->pid;
2491                         rc = server->ops->async_writev(wdata2,
2492                                                        cifs_writedata_release);
2493                 }
2494
2495                 kref_put(&wdata2->refcount, cifs_writedata_release);
2496                 if (rc) {
2497                         if (is_retryable_error(rc))
2498                                 continue;
2499                         fpos += cur_len;
2500                         rest_len -= cur_len;
2501                         break;
2502                 }
2503
2504                 fpos += cur_len;
2505                 rest_len -= cur_len;
2506         } while (rest_len > 0);
2507
2508         /* Clean up remaining pages from the original wdata */
2509         if (iov_iter_is_xarray(&wdata->iter))
2510                 cifs_pages_write_failed(inode, fpos, rest_len);
2511
2512         if (rc != 0 && !is_retryable_error(rc))
2513                 mapping_set_error(inode->i_mapping, rc);
2514         kref_put(&wdata->refcount, cifs_writedata_release);
2515 }
2516
2517 void
2518 cifs_writev_complete(struct work_struct *work)
2519 {
2520         struct cifs_writedata *wdata = container_of(work,
2521                                                 struct cifs_writedata, work);
2522         struct inode *inode = d_inode(wdata->cfile->dentry);
2523
2524         if (wdata->result == 0) {
2525                 spin_lock(&inode->i_lock);
2526                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2527                 spin_unlock(&inode->i_lock);
2528                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2529                                          wdata->bytes);
2530         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2531                 return cifs_writev_requeue(wdata);
2532
2533         if (wdata->result == -EAGAIN)
2534                 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2535         else if (wdata->result < 0)
2536                 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2537         else
2538                 cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2539
2540         if (wdata->result != -EAGAIN)
2541                 mapping_set_error(inode->i_mapping, wdata->result);
2542         kref_put(&wdata->refcount, cifs_writedata_release);
2543 }
2544
2545 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2546 {
2547         struct cifs_writedata *wdata;
2548
2549         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2550         if (wdata != NULL) {
2551                 kref_init(&wdata->refcount);
2552                 INIT_LIST_HEAD(&wdata->list);
2553                 init_completion(&wdata->done);
2554                 INIT_WORK(&wdata->work, complete);
2555         }
2556         return wdata;
2557 }
2558
2559 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2560 {
2561         struct address_space *mapping = page->mapping;
2562         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2563         char *write_data;
2564         int rc = -EFAULT;
2565         int bytes_written = 0;
2566         struct inode *inode;
2567         struct cifsFileInfo *open_file;
2568
2569         if (!mapping || !mapping->host)
2570                 return -EFAULT;
2571
2572         inode = page->mapping->host;
2573
2574         offset += (loff_t)from;
2575         write_data = kmap(page);
2576         write_data += from;
2577
2578         if ((to > PAGE_SIZE) || (from > to)) {
2579                 kunmap(page);
2580                 return -EIO;
2581         }
2582
2583         /* racing with truncate? */
2584         if (offset > mapping->host->i_size) {
2585                 kunmap(page);
2586                 return 0; /* don't care */
2587         }
2588
2589         /* check to make sure that we are not extending the file */
2590         if (mapping->host->i_size - offset < (loff_t)to)
2591                 to = (unsigned)(mapping->host->i_size - offset);
2592
2593         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2594                                     &open_file);
2595         if (!rc) {
2596                 bytes_written = cifs_write(open_file, open_file->pid,
2597                                            write_data, to - from, &offset);
2598                 cifsFileInfo_put(open_file);
2599                 /* Does mm or vfs already set times? */
2600                 simple_inode_init_ts(inode);
2601                 if ((bytes_written > 0) && (offset))
2602                         rc = 0;
2603                 else if (bytes_written < 0)
2604                         rc = bytes_written;
2605                 else
2606                         rc = -EFAULT;
2607         } else {
2608                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2609                 if (!is_retryable_error(rc))
2610                         rc = -EIO;
2611         }
2612
2613         kunmap(page);
2614         return rc;
2615 }
2616
2617 /*
2618  * Extend the region to be written back to include subsequent contiguously
2619  * dirty pages if possible, but don't sleep while doing so.
2620  */
2621 static void cifs_extend_writeback(struct address_space *mapping,
2622                                   long *_count,
2623                                   loff_t start,
2624                                   int max_pages,
2625                                   size_t max_len,
2626                                   unsigned int *_len)
2627 {
2628         struct folio_batch batch;
2629         struct folio *folio;
2630         unsigned int psize, nr_pages;
2631         size_t len = *_len;
2632         pgoff_t index = (start + len) / PAGE_SIZE;
2633         bool stop = true;
2634         unsigned int i;
2635         XA_STATE(xas, &mapping->i_pages, index);
2636
2637         folio_batch_init(&batch);
2638
2639         do {
2640                 /* Firstly, we gather up a batch of contiguous dirty pages
2641                  * under the RCU read lock - but we can't clear the dirty flags
2642                  * there if any of those pages are mapped.
2643                  */
2644                 rcu_read_lock();
2645
2646                 xas_for_each(&xas, folio, ULONG_MAX) {
2647                         stop = true;
2648                         if (xas_retry(&xas, folio))
2649                                 continue;
2650                         if (xa_is_value(folio))
2651                                 break;
2652                         if (folio_index(folio) != index)
2653                                 break;
2654                         if (!folio_try_get_rcu(folio)) {
2655                                 xas_reset(&xas);
2656                                 continue;
2657                         }
2658                         nr_pages = folio_nr_pages(folio);
2659                         if (nr_pages > max_pages)
2660                                 break;
2661
2662                         /* Has the page moved or been split? */
2663                         if (unlikely(folio != xas_reload(&xas))) {
2664                                 folio_put(folio);
2665                                 break;
2666                         }
2667
2668                         if (!folio_trylock(folio)) {
2669                                 folio_put(folio);
2670                                 break;
2671                         }
2672                         if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
2673                                 folio_unlock(folio);
2674                                 folio_put(folio);
2675                                 break;
2676                         }
2677
2678                         max_pages -= nr_pages;
2679                         psize = folio_size(folio);
2680                         len += psize;
2681                         stop = false;
2682                         if (max_pages <= 0 || len >= max_len || *_count <= 0)
2683                                 stop = true;
2684
2685                         index += nr_pages;
2686                         if (!folio_batch_add(&batch, folio))
2687                                 break;
2688                         if (stop)
2689                                 break;
2690                 }
2691
2692                 if (!stop)
2693                         xas_pause(&xas);
2694                 rcu_read_unlock();
2695
2696                 /* Now, if we obtained any pages, we can shift them to being
2697                  * writable and mark them for caching.
2698                  */
2699                 if (!folio_batch_count(&batch))
2700                         break;
2701
2702                 for (i = 0; i < folio_batch_count(&batch); i++) {
2703                         folio = batch.folios[i];
2704                         /* The folio should be locked, dirty and not undergoing
2705                          * writeback from the loop above.
2706                          */
2707                         if (!folio_clear_dirty_for_io(folio))
2708                                 WARN_ON(1);
2709                         if (folio_start_writeback(folio))
2710                                 WARN_ON(1);
2711
2712                         *_count -= folio_nr_pages(folio);
2713                         folio_unlock(folio);
2714                 }
2715
2716                 folio_batch_release(&batch);
2717                 cond_resched();
2718         } while (!stop);
2719
2720         *_len = len;
2721 }
2722
2723 /*
2724  * Write back the locked page and any subsequent non-locked dirty pages.
2725  */
2726 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2727                                                  struct writeback_control *wbc,
2728                                                  struct folio *folio,
2729                                                  loff_t start, loff_t end)
2730 {
2731         struct inode *inode = mapping->host;
2732         struct TCP_Server_Info *server;
2733         struct cifs_writedata *wdata;
2734         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2735         struct cifs_credits credits_on_stack;
2736         struct cifs_credits *credits = &credits_on_stack;
2737         struct cifsFileInfo *cfile = NULL;
2738         unsigned int xid, wsize, len;
2739         loff_t i_size = i_size_read(inode);
2740         size_t max_len;
2741         long count = wbc->nr_to_write;
2742         int rc;
2743
2744         /* The folio should be locked, dirty and not undergoing writeback. */
2745         if (folio_start_writeback(folio))
2746                 WARN_ON(1);
2747
2748         count -= folio_nr_pages(folio);
2749         len = folio_size(folio);
2750
2751         xid = get_xid();
2752         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2753
2754         rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2755         if (rc) {
2756                 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2757                 goto err_xid;
2758         }
2759
2760         rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2761                                            &wsize, credits);
2762         if (rc != 0)
2763                 goto err_close;
2764
2765         wdata = cifs_writedata_alloc(cifs_writev_complete);
2766         if (!wdata) {
2767                 rc = -ENOMEM;
2768                 goto err_uncredit;
2769         }
2770
2771         wdata->sync_mode = wbc->sync_mode;
2772         wdata->offset = folio_pos(folio);
2773         wdata->pid = cfile->pid;
2774         wdata->credits = credits_on_stack;
2775         wdata->cfile = cfile;
2776         wdata->server = server;
2777         cfile = NULL;
2778
2779         /* Find all consecutive lockable dirty pages, stopping when we find a
2780          * page that is not immediately lockable, is not dirty or is missing,
2781          * or we reach the end of the range.
2782          */
2783         if (start < i_size) {
2784                 /* Trim the write to the EOF; the extra data is ignored.  Also
2785                  * put an upper limit on the size of a single storedata op.
2786                  */
2787                 max_len = wsize;
2788                 max_len = min_t(unsigned long long, max_len, end - start + 1);
2789                 max_len = min_t(unsigned long long, max_len, i_size - start);
2790
2791                 if (len < max_len) {
2792                         int max_pages = INT_MAX;
2793
2794 #ifdef CONFIG_CIFS_SMB_DIRECT
2795                         if (server->smbd_conn)
2796                                 max_pages = server->smbd_conn->max_frmr_depth;
2797 #endif
2798                         max_pages -= folio_nr_pages(folio);
2799
2800                         if (max_pages > 0)
2801                                 cifs_extend_writeback(mapping, &count, start,
2802                                                       max_pages, max_len, &len);
2803                 }
2804                 len = min_t(loff_t, len, max_len);
2805         }
2806
2807         wdata->bytes = len;
2808
2809         /* We now have a contiguous set of dirty pages, each with writeback
2810          * set; the first page is still locked at this point, but all the rest
2811          * have been unlocked.
2812          */
2813         folio_unlock(folio);
2814
2815         if (start < i_size) {
2816                 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2817                                 start, len);
2818
2819                 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2820                 if (rc)
2821                         goto err_wdata;
2822
2823                 if (wdata->cfile->invalidHandle)
2824                         rc = -EAGAIN;
2825                 else
2826                         rc = wdata->server->ops->async_writev(wdata,
2827                                                               cifs_writedata_release);
2828                 if (rc >= 0) {
2829                         kref_put(&wdata->refcount, cifs_writedata_release);
2830                         goto err_close;
2831                 }
2832         } else {
2833                 /* The dirty region was entirely beyond the EOF. */
2834                 cifs_pages_written_back(inode, start, len);
2835                 rc = 0;
2836         }
2837
2838 err_wdata:
2839         kref_put(&wdata->refcount, cifs_writedata_release);
2840 err_uncredit:
2841         add_credits_and_wake_if(server, credits, 0);
2842 err_close:
2843         if (cfile)
2844                 cifsFileInfo_put(cfile);
2845 err_xid:
2846         free_xid(xid);
2847         if (rc == 0) {
2848                 wbc->nr_to_write = count;
2849                 rc = len;
2850         } else if (is_retryable_error(rc)) {
2851                 cifs_pages_write_redirty(inode, start, len);
2852         } else {
2853                 cifs_pages_write_failed(inode, start, len);
2854                 mapping_set_error(mapping, rc);
2855         }
2856         /* Indication to update ctime and mtime as close is deferred */
2857         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2858         return rc;
2859 }
2860
2861 /*
2862  * write a region of pages back to the server
2863  */
2864 static int cifs_writepages_region(struct address_space *mapping,
2865                                   struct writeback_control *wbc,
2866                                   loff_t start, loff_t end, loff_t *_next)
2867 {
2868         struct folio_batch fbatch;
2869         int skips = 0;
2870
2871         folio_batch_init(&fbatch);
2872         do {
2873                 int nr;
2874                 pgoff_t index = start / PAGE_SIZE;
2875
2876                 nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
2877                                             PAGECACHE_TAG_DIRTY, &fbatch);
2878                 if (!nr)
2879                         break;
2880
2881                 for (int i = 0; i < nr; i++) {
2882                         ssize_t ret;
2883                         struct folio *folio = fbatch.folios[i];
2884
2885 redo_folio:
2886                         start = folio_pos(folio); /* May regress with THPs */
2887
2888                         /* At this point we hold neither the i_pages lock nor the
2889                          * page lock: the page may be truncated or invalidated
2890                          * (changing page->mapping to NULL), or even swizzled
2891                          * back from swapper_space to tmpfs file mapping
2892                          */
2893                         if (wbc->sync_mode != WB_SYNC_NONE) {
2894                                 ret = folio_lock_killable(folio);
2895                                 if (ret < 0)
2896                                         goto write_error;
2897                         } else {
2898                                 if (!folio_trylock(folio))
2899                                         goto skip_write;
2900                         }
2901
2902                         if (folio_mapping(folio) != mapping ||
2903                             !folio_test_dirty(folio)) {
2904                                 start += folio_size(folio);
2905                                 folio_unlock(folio);
2906                                 continue;
2907                         }
2908
2909                         if (folio_test_writeback(folio) ||
2910                             folio_test_fscache(folio)) {
2911                                 folio_unlock(folio);
2912                                 if (wbc->sync_mode == WB_SYNC_NONE)
2913                                         goto skip_write;
2914
2915                                 folio_wait_writeback(folio);
2916 #ifdef CONFIG_CIFS_FSCACHE
2917                                 folio_wait_fscache(folio);
2918 #endif
2919                                 goto redo_folio;
2920                         }
2921
2922                         if (!folio_clear_dirty_for_io(folio))
2923                                 /* We hold the page lock - it should've been dirty. */
2924                                 WARN_ON(1);
2925
2926                         ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
2927                         if (ret < 0)
2928                                 goto write_error;
2929
2930                         start += ret;
2931                         continue;
2932
2933 write_error:
2934                         folio_batch_release(&fbatch);
2935                         *_next = start;
2936                         return ret;
2937
2938 skip_write:
2939                         /*
2940                          * Too many skipped writes, or need to reschedule?
2941                          * Treat it as a write error without an error code.
2942                          */
2943                         if (skips >= 5 || need_resched()) {
2944                                 ret = 0;
2945                                 goto write_error;
2946                         }
2947
2948                         /* Otherwise, just skip that folio and go on to the next */
2949                         skips++;
2950                         start += folio_size(folio);
2951                         continue;
2952                 }
2953
2954                 folio_batch_release(&fbatch);           
2955                 cond_resched();
2956         } while (wbc->nr_to_write > 0);
2957
2958         *_next = start;
2959         return 0;
2960 }
2961
2962 /*
2963  * Write some of the pending data back to the server
2964  */
2965 static int cifs_writepages(struct address_space *mapping,
2966                            struct writeback_control *wbc)
2967 {
2968         loff_t start, next;
2969         int ret;
2970
2971         /* We have to be careful as we can end up racing with setattr()
2972          * truncating the pagecache since the caller doesn't take a lock here
2973          * to prevent it.
2974          */
2975
2976         if (wbc->range_cyclic) {
2977                 start = mapping->writeback_index * PAGE_SIZE;
2978                 ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
2979                 if (ret == 0) {
2980                         mapping->writeback_index = next / PAGE_SIZE;
2981                         if (start > 0 && wbc->nr_to_write > 0) {
2982                                 ret = cifs_writepages_region(mapping, wbc, 0,
2983                                                              start, &next);
2984                                 if (ret == 0)
2985                                         mapping->writeback_index =
2986                                                 next / PAGE_SIZE;
2987                         }
2988                 }
2989         } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
2990                 ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
2991                 if (wbc->nr_to_write > 0 && ret == 0)
2992                         mapping->writeback_index = next / PAGE_SIZE;
2993         } else {
2994                 ret = cifs_writepages_region(mapping, wbc,
2995                                              wbc->range_start, wbc->range_end, &next);
2996         }
2997
2998         return ret;
2999 }
3000
3001 static int
3002 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3003 {
3004         int rc;
3005         unsigned int xid;
3006
3007         xid = get_xid();
3008 /* BB add check for wbc flags */
3009         get_page(page);
3010         if (!PageUptodate(page))
3011                 cifs_dbg(FYI, "ppw - page not up to date\n");
3012
3013         /*
3014          * Set the "writeback" flag, and clear "dirty" in the radix tree.
3015          *
3016          * A writepage() implementation always needs to do either this,
3017          * or re-dirty the page with "redirty_page_for_writepage()" in
3018          * the case of a failure.
3019          *
3020          * Just unlocking the page will cause the radix tree tag-bits
3021          * to fail to update with the state of the page correctly.
3022          */
3023         set_page_writeback(page);
3024 retry_write:
3025         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3026         if (is_retryable_error(rc)) {
3027                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3028                         goto retry_write;
3029                 redirty_page_for_writepage(wbc, page);
3030         } else if (rc != 0) {
3031                 SetPageError(page);
3032                 mapping_set_error(page->mapping, rc);
3033         } else {
3034                 SetPageUptodate(page);
3035         }
3036         end_page_writeback(page);
3037         put_page(page);
3038         free_xid(xid);
3039         return rc;
3040 }
3041
3042 static int cifs_write_end(struct file *file, struct address_space *mapping,
3043                         loff_t pos, unsigned len, unsigned copied,
3044                         struct page *page, void *fsdata)
3045 {
3046         int rc;
3047         struct inode *inode = mapping->host;
3048         struct cifsFileInfo *cfile = file->private_data;
3049         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3050         struct folio *folio = page_folio(page);
3051         __u32 pid;
3052
3053         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3054                 pid = cfile->pid;
3055         else
3056                 pid = current->tgid;
3057
3058         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3059                  page, pos, copied);
3060
3061         if (folio_test_checked(folio)) {
3062                 if (copied == len)
3063                         folio_mark_uptodate(folio);
3064                 folio_clear_checked(folio);
3065         } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3066                 folio_mark_uptodate(folio);
3067
3068         if (!folio_test_uptodate(folio)) {
3069                 char *page_data;
3070                 unsigned offset = pos & (PAGE_SIZE - 1);
3071                 unsigned int xid;
3072
3073                 xid = get_xid();
3074                 /* this is probably better than directly calling
3075                    partialpage_write since in this function the file handle is
3076                    known which we might as well leverage */
3077                 /* BB check if anything else missing out of ppw
3078                    such as updating last write time */
3079                 page_data = kmap(page);
3080                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3081                 /* if (rc < 0) should we set writebehind rc? */
3082                 kunmap(page);
3083
3084                 free_xid(xid);
3085         } else {
3086                 rc = copied;
3087                 pos += copied;
3088                 set_page_dirty(page);
3089         }
3090
3091         if (rc > 0) {
3092                 spin_lock(&inode->i_lock);
3093                 if (pos > inode->i_size) {
3094                         i_size_write(inode, pos);
3095                         inode->i_blocks = (512 - 1 + pos) >> 9;
3096                 }
3097                 spin_unlock(&inode->i_lock);
3098         }
3099
3100         unlock_page(page);
3101         put_page(page);
3102         /* Indication to update ctime and mtime as close is deferred */
3103         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3104
3105         return rc;
3106 }
3107
3108 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3109                       int datasync)
3110 {
3111         unsigned int xid;
3112         int rc = 0;
3113         struct cifs_tcon *tcon;
3114         struct TCP_Server_Info *server;
3115         struct cifsFileInfo *smbfile = file->private_data;
3116         struct inode *inode = file_inode(file);
3117         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3118
3119         rc = file_write_and_wait_range(file, start, end);
3120         if (rc) {
3121                 trace_cifs_fsync_err(inode->i_ino, rc);
3122                 return rc;
3123         }
3124
3125         xid = get_xid();
3126
3127         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3128                  file, datasync);
3129
3130         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3131                 rc = cifs_zap_mapping(inode);
3132                 if (rc) {
3133                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3134                         rc = 0; /* don't care about it in fsync */
3135                 }
3136         }
3137
3138         tcon = tlink_tcon(smbfile->tlink);
3139         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3140                 server = tcon->ses->server;
3141                 if (server->ops->flush == NULL) {
3142                         rc = -ENOSYS;
3143                         goto strict_fsync_exit;
3144                 }
3145
3146                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3147                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3148                         if (smbfile) {
3149                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3150                                 cifsFileInfo_put(smbfile);
3151                         } else
3152                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3153                 } else
3154                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3155         }
3156
3157 strict_fsync_exit:
3158         free_xid(xid);
3159         return rc;
3160 }
3161
3162 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3163 {
3164         unsigned int xid;
3165         int rc = 0;
3166         struct cifs_tcon *tcon;
3167         struct TCP_Server_Info *server;
3168         struct cifsFileInfo *smbfile = file->private_data;
3169         struct inode *inode = file_inode(file);
3170         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3171
3172         rc = file_write_and_wait_range(file, start, end);
3173         if (rc) {
3174                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3175                 return rc;
3176         }
3177
3178         xid = get_xid();
3179
3180         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3181                  file, datasync);
3182
3183         tcon = tlink_tcon(smbfile->tlink);
3184         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3185                 server = tcon->ses->server;
3186                 if (server->ops->flush == NULL) {
3187                         rc = -ENOSYS;
3188                         goto fsync_exit;
3189                 }
3190
3191                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3192                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3193                         if (smbfile) {
3194                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3195                                 cifsFileInfo_put(smbfile);
3196                         } else
3197                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3198                 } else
3199                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3200         }
3201
3202 fsync_exit:
3203         free_xid(xid);
3204         return rc;
3205 }
3206
3207 /*
3208  * As file closes, flush all cached write data for this inode checking
3209  * for write behind errors.
3210  */
3211 int cifs_flush(struct file *file, fl_owner_t id)
3212 {
3213         struct inode *inode = file_inode(file);
3214         int rc = 0;
3215
3216         if (file->f_mode & FMODE_WRITE)
3217                 rc = filemap_write_and_wait(inode->i_mapping);
3218
3219         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3220         if (rc) {
3221                 /* get more nuanced writeback errors */
3222                 rc = filemap_check_wb_err(file->f_mapping, 0);
3223                 trace_cifs_flush_err(inode->i_ino, rc);
3224         }
3225         return rc;
3226 }
3227
3228 static void
3229 cifs_uncached_writedata_release(struct kref *refcount)
3230 {
3231         struct cifs_writedata *wdata = container_of(refcount,
3232                                         struct cifs_writedata, refcount);
3233
3234         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3235         cifs_writedata_release(refcount);
3236 }
3237
3238 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3239
3240 static void
3241 cifs_uncached_writev_complete(struct work_struct *work)
3242 {
3243         struct cifs_writedata *wdata = container_of(work,
3244                                         struct cifs_writedata, work);
3245         struct inode *inode = d_inode(wdata->cfile->dentry);
3246         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3247
3248         spin_lock(&inode->i_lock);
3249         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3250         if (cifsi->server_eof > inode->i_size)
3251                 i_size_write(inode, cifsi->server_eof);
3252         spin_unlock(&inode->i_lock);
3253
3254         complete(&wdata->done);
3255         collect_uncached_write_data(wdata->ctx);
3256         /* the below call can possibly free the last ref to aio ctx */
3257         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3258 }
3259
3260 static int
3261 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3262         struct cifs_aio_ctx *ctx)
3263 {
3264         unsigned int wsize;
3265         struct cifs_credits credits;
3266         int rc;
3267         struct TCP_Server_Info *server = wdata->server;
3268
3269         do {
3270                 if (wdata->cfile->invalidHandle) {
3271                         rc = cifs_reopen_file(wdata->cfile, false);
3272                         if (rc == -EAGAIN)
3273                                 continue;
3274                         else if (rc)
3275                                 break;
3276                 }
3277
3278
3279                 /*
3280                  * Wait for credits to resend this wdata.
3281                  * Note: we are attempting to resend the whole wdata not in
3282                  * segments
3283                  */
3284                 do {
3285                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3286                                                 &wsize, &credits);
3287                         if (rc)
3288                                 goto fail;
3289
3290                         if (wsize < wdata->bytes) {
3291                                 add_credits_and_wake_if(server, &credits, 0);
3292                                 msleep(1000);
3293                         }
3294                 } while (wsize < wdata->bytes);
3295                 wdata->credits = credits;
3296
3297                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3298
3299                 if (!rc) {
3300                         if (wdata->cfile->invalidHandle)
3301                                 rc = -EAGAIN;
3302                         else {
3303 #ifdef CONFIG_CIFS_SMB_DIRECT
3304                                 if (wdata->mr) {
3305                                         wdata->mr->need_invalidate = true;
3306                                         smbd_deregister_mr(wdata->mr);
3307                                         wdata->mr = NULL;
3308                                 }
3309 #endif
3310                                 rc = server->ops->async_writev(wdata,
3311                                         cifs_uncached_writedata_release);
3312                         }
3313                 }
3314
3315                 /* If the write was successfully sent, we are done */
3316                 if (!rc) {
3317                         list_add_tail(&wdata->list, wdata_list);
3318                         return 0;
3319                 }
3320
3321                 /* Roll back credits and retry if needed */
3322                 add_credits_and_wake_if(server, &wdata->credits, 0);
3323         } while (rc == -EAGAIN);
3324
3325 fail:
3326         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3327         return rc;
3328 }
3329
3330 /*
3331  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3332  * size and maximum number of segments.
3333  */
3334 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3335                                      size_t max_segs, unsigned int *_nsegs)
3336 {
3337         const struct bio_vec *bvecs = iter->bvec;
3338         unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3339         size_t len, span = 0, n = iter->count;
3340         size_t skip = iter->iov_offset;
3341
3342         if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3343                 return 0;
3344
3345         while (n && ix < nbv && skip) {
3346                 len = bvecs[ix].bv_len;
3347                 if (skip < len)
3348                         break;
3349                 skip -= len;
3350                 n -= len;
3351                 ix++;
3352         }
3353
3354         while (n && ix < nbv) {
3355                 len = min3(n, bvecs[ix].bv_len - skip, max_size);
3356                 span += len;
3357                 max_size -= len;
3358                 nsegs++;
3359                 ix++;
3360                 if (max_size == 0 || nsegs >= max_segs)
3361                         break;
3362                 skip = 0;
3363                 n -= len;
3364         }
3365
3366         *_nsegs = nsegs;
3367         return span;
3368 }
3369
3370 static int
3371 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3372                      struct cifsFileInfo *open_file,
3373                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3374                      struct cifs_aio_ctx *ctx)
3375 {
3376         int rc = 0;
3377         size_t cur_len, max_len;
3378         struct cifs_writedata *wdata;
3379         pid_t pid;
3380         struct TCP_Server_Info *server;
3381         unsigned int xid, max_segs = INT_MAX;
3382
3383         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3384                 pid = open_file->pid;
3385         else
3386                 pid = current->tgid;
3387
3388         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3389         xid = get_xid();
3390
3391 #ifdef CONFIG_CIFS_SMB_DIRECT
3392         if (server->smbd_conn)
3393                 max_segs = server->smbd_conn->max_frmr_depth;
3394 #endif
3395
3396         do {
3397                 struct cifs_credits credits_on_stack;
3398                 struct cifs_credits *credits = &credits_on_stack;
3399                 unsigned int wsize, nsegs = 0;
3400
3401                 if (signal_pending(current)) {
3402                         rc = -EINTR;
3403                         break;
3404                 }
3405
3406                 if (open_file->invalidHandle) {
3407                         rc = cifs_reopen_file(open_file, false);
3408                         if (rc == -EAGAIN)
3409                                 continue;
3410                         else if (rc)
3411                                 break;
3412                 }
3413
3414                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3415                                                    &wsize, credits);
3416                 if (rc)
3417                         break;
3418
3419                 max_len = min_t(const size_t, len, wsize);
3420                 if (!max_len) {
3421                         rc = -EAGAIN;
3422                         add_credits_and_wake_if(server, credits, 0);
3423                         break;
3424                 }
3425
3426                 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3427                 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3428                          cur_len, max_len, nsegs, from->nr_segs, max_segs);
3429                 if (cur_len == 0) {
3430                         rc = -EIO;
3431                         add_credits_and_wake_if(server, credits, 0);
3432                         break;
3433                 }
3434
3435                 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3436                 if (!wdata) {
3437                         rc = -ENOMEM;
3438                         add_credits_and_wake_if(server, credits, 0);
3439                         break;
3440                 }
3441
3442                 wdata->sync_mode = WB_SYNC_ALL;
3443                 wdata->offset   = (__u64)fpos;
3444                 wdata->cfile    = cifsFileInfo_get(open_file);
3445                 wdata->server   = server;
3446                 wdata->pid      = pid;
3447                 wdata->bytes    = cur_len;
3448                 wdata->credits  = credits_on_stack;
3449                 wdata->iter     = *from;
3450                 wdata->ctx      = ctx;
3451                 kref_get(&ctx->refcount);
3452
3453                 iov_iter_truncate(&wdata->iter, cur_len);
3454
3455                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3456
3457                 if (!rc) {
3458                         if (wdata->cfile->invalidHandle)
3459                                 rc = -EAGAIN;
3460                         else
3461                                 rc = server->ops->async_writev(wdata,
3462                                         cifs_uncached_writedata_release);
3463                 }
3464
3465                 if (rc) {
3466                         add_credits_and_wake_if(server, &wdata->credits, 0);
3467                         kref_put(&wdata->refcount,
3468                                  cifs_uncached_writedata_release);
3469                         if (rc == -EAGAIN)
3470                                 continue;
3471                         break;
3472                 }
3473
3474                 list_add_tail(&wdata->list, wdata_list);
3475                 iov_iter_advance(from, cur_len);
3476                 fpos += cur_len;
3477                 len -= cur_len;
3478         } while (len > 0);
3479
3480         free_xid(xid);
3481         return rc;
3482 }
3483
3484 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3485 {
3486         struct cifs_writedata *wdata, *tmp;
3487         struct cifs_tcon *tcon;
3488         struct cifs_sb_info *cifs_sb;
3489         struct dentry *dentry = ctx->cfile->dentry;
3490         ssize_t rc;
3491
3492         tcon = tlink_tcon(ctx->cfile->tlink);
3493         cifs_sb = CIFS_SB(dentry->d_sb);
3494
3495         mutex_lock(&ctx->aio_mutex);
3496
3497         if (list_empty(&ctx->list)) {
3498                 mutex_unlock(&ctx->aio_mutex);
3499                 return;
3500         }
3501
3502         rc = ctx->rc;
3503         /*
3504          * Wait for and collect replies for any successful sends in order of
3505          * increasing offset. Once an error is hit, then return without waiting
3506          * for any more replies.
3507          */
3508 restart_loop:
3509         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3510                 if (!rc) {
3511                         if (!try_wait_for_completion(&wdata->done)) {
3512                                 mutex_unlock(&ctx->aio_mutex);
3513                                 return;
3514                         }
3515
3516                         if (wdata->result)
3517                                 rc = wdata->result;
3518                         else
3519                                 ctx->total_len += wdata->bytes;
3520
3521                         /* resend call if it's a retryable error */
3522                         if (rc == -EAGAIN) {
3523                                 struct list_head tmp_list;
3524                                 struct iov_iter tmp_from = ctx->iter;
3525
3526                                 INIT_LIST_HEAD(&tmp_list);
3527                                 list_del_init(&wdata->list);
3528
3529                                 if (ctx->direct_io)
3530                                         rc = cifs_resend_wdata(
3531                                                 wdata, &tmp_list, ctx);
3532                                 else {
3533                                         iov_iter_advance(&tmp_from,
3534                                                  wdata->offset - ctx->pos);
3535
3536                                         rc = cifs_write_from_iter(wdata->offset,
3537                                                 wdata->bytes, &tmp_from,
3538                                                 ctx->cfile, cifs_sb, &tmp_list,
3539                                                 ctx);
3540
3541                                         kref_put(&wdata->refcount,
3542                                                 cifs_uncached_writedata_release);
3543                                 }
3544
3545                                 list_splice(&tmp_list, &ctx->list);
3546                                 goto restart_loop;
3547                         }
3548                 }
3549                 list_del_init(&wdata->list);
3550                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3551         }
3552
3553         cifs_stats_bytes_written(tcon, ctx->total_len);
3554         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3555
3556         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3557
3558         mutex_unlock(&ctx->aio_mutex);
3559
3560         if (ctx->iocb && ctx->iocb->ki_complete)
3561                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3562         else
3563                 complete(&ctx->done);
3564 }
3565
3566 static ssize_t __cifs_writev(
3567         struct kiocb *iocb, struct iov_iter *from, bool direct)
3568 {
3569         struct file *file = iocb->ki_filp;
3570         ssize_t total_written = 0;
3571         struct cifsFileInfo *cfile;
3572         struct cifs_tcon *tcon;
3573         struct cifs_sb_info *cifs_sb;
3574         struct cifs_aio_ctx *ctx;
3575         int rc;
3576
3577         rc = generic_write_checks(iocb, from);
3578         if (rc <= 0)
3579                 return rc;
3580
3581         cifs_sb = CIFS_FILE_SB(file);
3582         cfile = file->private_data;
3583         tcon = tlink_tcon(cfile->tlink);
3584
3585         if (!tcon->ses->server->ops->async_writev)
3586                 return -ENOSYS;
3587
3588         ctx = cifs_aio_ctx_alloc();
3589         if (!ctx)
3590                 return -ENOMEM;
3591
3592         ctx->cfile = cifsFileInfo_get(cfile);
3593
3594         if (!is_sync_kiocb(iocb))
3595                 ctx->iocb = iocb;
3596
3597         ctx->pos = iocb->ki_pos;
3598         ctx->direct_io = direct;
3599         ctx->nr_pinned_pages = 0;
3600
3601         if (user_backed_iter(from)) {
3602                 /*
3603                  * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3604                  * they contain references to the calling process's virtual
3605                  * memory layout which won't be available in an async worker
3606                  * thread.  This also takes a pin on every folio involved.
3607                  */
3608                 rc = netfs_extract_user_iter(from, iov_iter_count(from),
3609                                              &ctx->iter, 0);
3610                 if (rc < 0) {
3611                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3612                         return rc;
3613                 }
3614
3615                 ctx->nr_pinned_pages = rc;
3616                 ctx->bv = (void *)ctx->iter.bvec;
3617                 ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3618         } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3619                    !is_sync_kiocb(iocb)) {
3620                 /*
3621                  * If the op is asynchronous, we need to copy the list attached
3622                  * to a BVEC/KVEC-type iterator, but we assume that the storage
3623                  * will be pinned by the caller; in any case, we may or may not
3624                  * be able to pin the pages, so we don't try.
3625                  */
3626                 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3627                 if (!ctx->bv) {
3628                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3629                         return -ENOMEM;
3630                 }
3631         } else {
3632                 /*
3633                  * Otherwise, we just pass the iterator down as-is and rely on
3634                  * the caller to make sure the pages referred to by the
3635                  * iterator don't evaporate.
3636                  */
3637                 ctx->iter = *from;
3638         }
3639
3640         ctx->len = iov_iter_count(&ctx->iter);
3641
3642         /* grab a lock here due to read response handlers can access ctx */
3643         mutex_lock(&ctx->aio_mutex);
3644
3645         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3646                                   cfile, cifs_sb, &ctx->list, ctx);
3647
3648         /*
3649          * If at least one write was successfully sent, then discard any rc
3650          * value from the later writes. If the other write succeeds, then
3651          * we'll end up returning whatever was written. If it fails, then
3652          * we'll get a new rc value from that.
3653          */
3654         if (!list_empty(&ctx->list))
3655                 rc = 0;
3656
3657         mutex_unlock(&ctx->aio_mutex);
3658
3659         if (rc) {
3660                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3661                 return rc;
3662         }
3663
3664         if (!is_sync_kiocb(iocb)) {
3665                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3666                 return -EIOCBQUEUED;
3667         }
3668
3669         rc = wait_for_completion_killable(&ctx->done);
3670         if (rc) {
3671                 mutex_lock(&ctx->aio_mutex);
3672                 ctx->rc = rc = -EINTR;
3673                 total_written = ctx->total_len;
3674                 mutex_unlock(&ctx->aio_mutex);
3675         } else {
3676                 rc = ctx->rc;
3677                 total_written = ctx->total_len;
3678         }
3679
3680         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3681
3682         if (unlikely(!total_written))
3683                 return rc;
3684
3685         iocb->ki_pos += total_written;
3686         return total_written;
3687 }
3688
3689 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3690 {
3691         struct file *file = iocb->ki_filp;
3692
3693         cifs_revalidate_mapping(file->f_inode);
3694         return __cifs_writev(iocb, from, true);
3695 }
3696
3697 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3698 {
3699         return __cifs_writev(iocb, from, false);
3700 }
3701
3702 static ssize_t
3703 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3704 {
3705         struct file *file = iocb->ki_filp;
3706         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3707         struct inode *inode = file->f_mapping->host;
3708         struct cifsInodeInfo *cinode = CIFS_I(inode);
3709         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3710         ssize_t rc;
3711
3712         inode_lock(inode);
3713         /*
3714          * We need to hold the sem to be sure nobody modifies lock list
3715          * with a brlock that prevents writing.
3716          */
3717         down_read(&cinode->lock_sem);
3718
3719         rc = generic_write_checks(iocb, from);
3720         if (rc <= 0)
3721                 goto out;
3722
3723         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3724                                      server->vals->exclusive_lock_type, 0,
3725                                      NULL, CIFS_WRITE_OP))
3726                 rc = __generic_file_write_iter(iocb, from);
3727         else
3728                 rc = -EACCES;
3729 out:
3730         up_read(&cinode->lock_sem);
3731         inode_unlock(inode);
3732
3733         if (rc > 0)
3734                 rc = generic_write_sync(iocb, rc);
3735         return rc;
3736 }
3737
3738 ssize_t
3739 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3740 {
3741         struct inode *inode = file_inode(iocb->ki_filp);
3742         struct cifsInodeInfo *cinode = CIFS_I(inode);
3743         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3744         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3745                                                 iocb->ki_filp->private_data;
3746         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3747         ssize_t written;
3748
3749         written = cifs_get_writer(cinode);
3750         if (written)
3751                 return written;
3752
3753         if (CIFS_CACHE_WRITE(cinode)) {
3754                 if (cap_unix(tcon->ses) &&
3755                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3756                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3757                         written = generic_file_write_iter(iocb, from);
3758                         goto out;
3759                 }
3760                 written = cifs_writev(iocb, from);
3761                 goto out;
3762         }
3763         /*
3764          * For non-oplocked files in strict cache mode we need to write the data
3765          * to the server exactly from the pos to pos+len-1 rather than flush all
3766          * affected pages because it may cause a error with mandatory locks on
3767          * these pages but not on the region from pos to ppos+len-1.
3768          */
3769         written = cifs_user_writev(iocb, from);
3770         if (CIFS_CACHE_READ(cinode)) {
3771                 /*
3772                  * We have read level caching and we have just sent a write
3773                  * request to the server thus making data in the cache stale.
3774                  * Zap the cache and set oplock/lease level to NONE to avoid
3775                  * reading stale data from the cache. All subsequent read
3776                  * operations will read new data from the server.
3777                  */
3778                 cifs_zap_mapping(inode);
3779                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3780                          inode);
3781                 cinode->oplock = 0;
3782         }
3783 out:
3784         cifs_put_writer(cinode);
3785         return written;
3786 }
3787
3788 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3789 {
3790         struct cifs_readdata *rdata;
3791
3792         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3793         if (rdata) {
3794                 kref_init(&rdata->refcount);
3795                 INIT_LIST_HEAD(&rdata->list);
3796                 init_completion(&rdata->done);
3797                 INIT_WORK(&rdata->work, complete);
3798         }
3799
3800         return rdata;
3801 }
3802
3803 void
3804 cifs_readdata_release(struct kref *refcount)
3805 {
3806         struct cifs_readdata *rdata = container_of(refcount,
3807                                         struct cifs_readdata, refcount);
3808
3809         if (rdata->ctx)
3810                 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3811 #ifdef CONFIG_CIFS_SMB_DIRECT
3812         if (rdata->mr) {
3813                 smbd_deregister_mr(rdata->mr);
3814                 rdata->mr = NULL;
3815         }
3816 #endif
3817         if (rdata->cfile)
3818                 cifsFileInfo_put(rdata->cfile);
3819
3820         kfree(rdata);
3821 }
3822
3823 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3824
3825 static void
3826 cifs_uncached_readv_complete(struct work_struct *work)
3827 {
3828         struct cifs_readdata *rdata = container_of(work,
3829                                                 struct cifs_readdata, work);
3830
3831         complete(&rdata->done);
3832         collect_uncached_read_data(rdata->ctx);
3833         /* the below call can possibly free the last ref to aio ctx */
3834         kref_put(&rdata->refcount, cifs_readdata_release);
3835 }
3836
3837 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3838                         struct list_head *rdata_list,
3839                         struct cifs_aio_ctx *ctx)
3840 {
3841         unsigned int rsize;
3842         struct cifs_credits credits;
3843         int rc;
3844         struct TCP_Server_Info *server;
3845
3846         /* XXX: should we pick a new channel here? */
3847         server = rdata->server;
3848
3849         do {
3850                 if (rdata->cfile->invalidHandle) {
3851                         rc = cifs_reopen_file(rdata->cfile, true);
3852                         if (rc == -EAGAIN)
3853                                 continue;
3854                         else if (rc)
3855                                 break;
3856                 }
3857
3858                 /*
3859                  * Wait for credits to resend this rdata.
3860                  * Note: we are attempting to resend the whole rdata not in
3861                  * segments
3862                  */
3863                 do {
3864                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3865                                                 &rsize, &credits);
3866
3867                         if (rc)
3868                                 goto fail;
3869
3870                         if (rsize < rdata->bytes) {
3871                                 add_credits_and_wake_if(server, &credits, 0);
3872                                 msleep(1000);
3873                         }
3874                 } while (rsize < rdata->bytes);
3875                 rdata->credits = credits;
3876
3877                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3878                 if (!rc) {
3879                         if (rdata->cfile->invalidHandle)
3880                                 rc = -EAGAIN;
3881                         else {
3882 #ifdef CONFIG_CIFS_SMB_DIRECT
3883                                 if (rdata->mr) {
3884                                         rdata->mr->need_invalidate = true;
3885                                         smbd_deregister_mr(rdata->mr);
3886                                         rdata->mr = NULL;
3887                                 }
3888 #endif
3889                                 rc = server->ops->async_readv(rdata);
3890                         }
3891                 }
3892
3893                 /* If the read was successfully sent, we are done */
3894                 if (!rc) {
3895                         /* Add to aio pending list */
3896                         list_add_tail(&rdata->list, rdata_list);
3897                         return 0;
3898                 }
3899
3900                 /* Roll back credits and retry if needed */
3901                 add_credits_and_wake_if(server, &rdata->credits, 0);
3902         } while (rc == -EAGAIN);
3903
3904 fail:
3905         kref_put(&rdata->refcount, cifs_readdata_release);
3906         return rc;
3907 }
3908
3909 static int
3910 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
3911                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3912                      struct cifs_aio_ctx *ctx)
3913 {
3914         struct cifs_readdata *rdata;
3915         unsigned int rsize, nsegs, max_segs = INT_MAX;
3916         struct cifs_credits credits_on_stack;
3917         struct cifs_credits *credits = &credits_on_stack;
3918         size_t cur_len, max_len;
3919         int rc;
3920         pid_t pid;
3921         struct TCP_Server_Info *server;
3922
3923         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3924
3925 #ifdef CONFIG_CIFS_SMB_DIRECT
3926         if (server->smbd_conn)
3927                 max_segs = server->smbd_conn->max_frmr_depth;
3928 #endif
3929
3930         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3931                 pid = open_file->pid;
3932         else
3933                 pid = current->tgid;
3934
3935         do {
3936                 if (open_file->invalidHandle) {
3937                         rc = cifs_reopen_file(open_file, true);
3938                         if (rc == -EAGAIN)
3939                                 continue;
3940                         else if (rc)
3941                                 break;
3942                 }
3943
3944                 if (cifs_sb->ctx->rsize == 0)
3945                         cifs_sb->ctx->rsize =
3946                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3947                                                              cifs_sb->ctx);
3948
3949                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3950                                                    &rsize, credits);
3951                 if (rc)
3952                         break;
3953
3954                 max_len = min_t(size_t, len, rsize);
3955
3956                 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
3957                                                  max_segs, &nsegs);
3958                 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3959                          cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
3960                 if (cur_len == 0) {
3961                         rc = -EIO;
3962                         add_credits_and_wake_if(server, credits, 0);
3963                         break;
3964                 }
3965
3966                 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
3967                 if (!rdata) {
3968                         add_credits_and_wake_if(server, credits, 0);
3969                         rc = -ENOMEM;
3970                         break;
3971                 }
3972
3973                 rdata->server   = server;
3974                 rdata->cfile    = cifsFileInfo_get(open_file);
3975                 rdata->offset   = fpos;
3976                 rdata->bytes    = cur_len;
3977                 rdata->pid      = pid;
3978                 rdata->credits  = credits_on_stack;
3979                 rdata->ctx      = ctx;
3980                 kref_get(&ctx->refcount);
3981
3982                 rdata->iter     = ctx->iter;
3983                 iov_iter_truncate(&rdata->iter, cur_len);
3984
3985                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3986
3987                 if (!rc) {
3988                         if (rdata->cfile->invalidHandle)
3989                                 rc = -EAGAIN;
3990                         else
3991                                 rc = server->ops->async_readv(rdata);
3992                 }
3993
3994                 if (rc) {
3995                         add_credits_and_wake_if(server, &rdata->credits, 0);
3996                         kref_put(&rdata->refcount, cifs_readdata_release);
3997                         if (rc == -EAGAIN)
3998                                 continue;
3999                         break;
4000                 }
4001
4002                 list_add_tail(&rdata->list, rdata_list);
4003                 iov_iter_advance(&ctx->iter, cur_len);
4004                 fpos += cur_len;
4005                 len -= cur_len;
4006         } while (len > 0);
4007
4008         return rc;
4009 }
4010
4011 static void
4012 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4013 {
4014         struct cifs_readdata *rdata, *tmp;
4015         struct cifs_sb_info *cifs_sb;
4016         int rc;
4017
4018         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4019
4020         mutex_lock(&ctx->aio_mutex);
4021
4022         if (list_empty(&ctx->list)) {
4023                 mutex_unlock(&ctx->aio_mutex);
4024                 return;
4025         }
4026
4027         rc = ctx->rc;
4028         /* the loop below should proceed in the order of increasing offsets */
4029 again:
4030         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4031                 if (!rc) {
4032                         if (!try_wait_for_completion(&rdata->done)) {
4033                                 mutex_unlock(&ctx->aio_mutex);
4034                                 return;
4035                         }
4036
4037                         if (rdata->result == -EAGAIN) {
4038                                 /* resend call if it's a retryable error */
4039                                 struct list_head tmp_list;
4040                                 unsigned int got_bytes = rdata->got_bytes;
4041
4042                                 list_del_init(&rdata->list);
4043                                 INIT_LIST_HEAD(&tmp_list);
4044
4045                                 if (ctx->direct_io) {
4046                                         /*
4047                                          * Re-use rdata as this is a
4048                                          * direct I/O
4049                                          */
4050                                         rc = cifs_resend_rdata(
4051                                                 rdata,
4052                                                 &tmp_list, ctx);
4053                                 } else {
4054                                         rc = cifs_send_async_read(
4055                                                 rdata->offset + got_bytes,
4056                                                 rdata->bytes - got_bytes,
4057                                                 rdata->cfile, cifs_sb,
4058                                                 &tmp_list, ctx);
4059
4060                                         kref_put(&rdata->refcount,
4061                                                 cifs_readdata_release);
4062                                 }
4063
4064                                 list_splice(&tmp_list, &ctx->list);
4065
4066                                 goto again;
4067                         } else if (rdata->result)
4068                                 rc = rdata->result;
4069
4070                         /* if there was a short read -- discard anything left */
4071                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4072                                 rc = -ENODATA;
4073
4074                         ctx->total_len += rdata->got_bytes;
4075                 }
4076                 list_del_init(&rdata->list);
4077                 kref_put(&rdata->refcount, cifs_readdata_release);
4078         }
4079
4080         /* mask nodata case */
4081         if (rc == -ENODATA)
4082                 rc = 0;
4083
4084         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4085
4086         mutex_unlock(&ctx->aio_mutex);
4087
4088         if (ctx->iocb && ctx->iocb->ki_complete)
4089                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4090         else
4091                 complete(&ctx->done);
4092 }
4093
4094 static ssize_t __cifs_readv(
4095         struct kiocb *iocb, struct iov_iter *to, bool direct)
4096 {
4097         size_t len;
4098         struct file *file = iocb->ki_filp;
4099         struct cifs_sb_info *cifs_sb;
4100         struct cifsFileInfo *cfile;
4101         struct cifs_tcon *tcon;
4102         ssize_t rc, total_read = 0;
4103         loff_t offset = iocb->ki_pos;
4104         struct cifs_aio_ctx *ctx;
4105
4106         len = iov_iter_count(to);
4107         if (!len)
4108                 return 0;
4109
4110         cifs_sb = CIFS_FILE_SB(file);
4111         cfile = file->private_data;
4112         tcon = tlink_tcon(cfile->tlink);
4113
4114         if (!tcon->ses->server->ops->async_readv)
4115                 return -ENOSYS;
4116
4117         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4118                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4119
4120         ctx = cifs_aio_ctx_alloc();
4121         if (!ctx)
4122                 return -ENOMEM;
4123
4124         ctx->pos        = offset;
4125         ctx->direct_io  = direct;
4126         ctx->len        = len;
4127         ctx->cfile      = cifsFileInfo_get(cfile);
4128         ctx->nr_pinned_pages = 0;
4129
4130         if (!is_sync_kiocb(iocb))
4131                 ctx->iocb = iocb;
4132
4133         if (user_backed_iter(to)) {
4134                 /*
4135                  * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4136                  * they contain references to the calling process's virtual
4137                  * memory layout which won't be available in an async worker
4138                  * thread.  This also takes a pin on every folio involved.
4139                  */
4140                 rc = netfs_extract_user_iter(to, iov_iter_count(to),
4141                                              &ctx->iter, 0);
4142                 if (rc < 0) {
4143                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4144                         return rc;
4145                 }
4146
4147                 ctx->nr_pinned_pages = rc;
4148                 ctx->bv = (void *)ctx->iter.bvec;
4149                 ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4150                 ctx->should_dirty = true;
4151         } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4152                    !is_sync_kiocb(iocb)) {
4153                 /*
4154                  * If the op is asynchronous, we need to copy the list attached
4155                  * to a BVEC/KVEC-type iterator, but we assume that the storage
4156                  * will be retained by the caller; in any case, we may or may
4157                  * not be able to pin the pages, so we don't try.
4158                  */
4159                 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4160                 if (!ctx->bv) {
4161                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4162                         return -ENOMEM;
4163                 }
4164         } else {
4165                 /*
4166                  * Otherwise, we just pass the iterator down as-is and rely on
4167                  * the caller to make sure the pages referred to by the
4168                  * iterator don't evaporate.
4169                  */
4170                 ctx->iter = *to;
4171         }
4172
4173         if (direct) {
4174                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4175                                                   offset, offset + len - 1);
4176                 if (rc) {
4177                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4178                         return -EAGAIN;
4179                 }
4180         }
4181
4182         /* grab a lock here due to read response handlers can access ctx */
4183         mutex_lock(&ctx->aio_mutex);
4184
4185         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4186
4187         /* if at least one read request send succeeded, then reset rc */
4188         if (!list_empty(&ctx->list))
4189                 rc = 0;
4190
4191         mutex_unlock(&ctx->aio_mutex);
4192
4193         if (rc) {
4194                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4195                 return rc;
4196         }
4197
4198         if (!is_sync_kiocb(iocb)) {
4199                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4200                 return -EIOCBQUEUED;
4201         }
4202
4203         rc = wait_for_completion_killable(&ctx->done);
4204         if (rc) {
4205                 mutex_lock(&ctx->aio_mutex);
4206                 ctx->rc = rc = -EINTR;
4207                 total_read = ctx->total_len;
4208                 mutex_unlock(&ctx->aio_mutex);
4209         } else {
4210                 rc = ctx->rc;
4211                 total_read = ctx->total_len;
4212         }
4213
4214         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4215
4216         if (total_read) {
4217                 iocb->ki_pos += total_read;
4218                 return total_read;
4219         }
4220         return rc;
4221 }
4222
4223 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4224 {
4225         return __cifs_readv(iocb, to, true);
4226 }
4227
4228 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4229 {
4230         return __cifs_readv(iocb, to, false);
4231 }
4232
4233 ssize_t
4234 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4235 {
4236         struct inode *inode = file_inode(iocb->ki_filp);
4237         struct cifsInodeInfo *cinode = CIFS_I(inode);
4238         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4239         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4240                                                 iocb->ki_filp->private_data;
4241         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4242         int rc = -EACCES;
4243
4244         /*
4245          * In strict cache mode we need to read from the server all the time
4246          * if we don't have level II oplock because the server can delay mtime
4247          * change - so we can't make a decision about inode invalidating.
4248          * And we can also fail with pagereading if there are mandatory locks
4249          * on pages affected by this read but not on the region from pos to
4250          * pos+len-1.
4251          */
4252         if (!CIFS_CACHE_READ(cinode))
4253                 return cifs_user_readv(iocb, to);
4254
4255         if (cap_unix(tcon->ses) &&
4256             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4257             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4258                 return generic_file_read_iter(iocb, to);
4259
4260         /*
4261          * We need to hold the sem to be sure nobody modifies lock list
4262          * with a brlock that prevents reading.
4263          */
4264         down_read(&cinode->lock_sem);
4265         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4266                                      tcon->ses->server->vals->shared_lock_type,
4267                                      0, NULL, CIFS_READ_OP))
4268                 rc = generic_file_read_iter(iocb, to);
4269         up_read(&cinode->lock_sem);
4270         return rc;
4271 }
4272
4273 static ssize_t
4274 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4275 {
4276         int rc = -EACCES;
4277         unsigned int bytes_read = 0;
4278         unsigned int total_read;
4279         unsigned int current_read_size;
4280         unsigned int rsize;
4281         struct cifs_sb_info *cifs_sb;
4282         struct cifs_tcon *tcon;
4283         struct TCP_Server_Info *server;
4284         unsigned int xid;
4285         char *cur_offset;
4286         struct cifsFileInfo *open_file;
4287         struct cifs_io_parms io_parms = {0};
4288         int buf_type = CIFS_NO_BUFFER;
4289         __u32 pid;
4290
4291         xid = get_xid();
4292         cifs_sb = CIFS_FILE_SB(file);
4293
4294         /* FIXME: set up handlers for larger reads and/or convert to async */
4295         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4296
4297         if (file->private_data == NULL) {
4298                 rc = -EBADF;
4299                 free_xid(xid);
4300                 return rc;
4301         }
4302         open_file = file->private_data;
4303         tcon = tlink_tcon(open_file->tlink);
4304         server = cifs_pick_channel(tcon->ses);
4305
4306         if (!server->ops->sync_read) {
4307                 free_xid(xid);
4308                 return -ENOSYS;
4309         }
4310
4311         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4312                 pid = open_file->pid;
4313         else
4314                 pid = current->tgid;
4315
4316         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4317                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4318
4319         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4320              total_read += bytes_read, cur_offset += bytes_read) {
4321                 do {
4322                         current_read_size = min_t(uint, read_size - total_read,
4323                                                   rsize);
4324                         /*
4325                          * For windows me and 9x we do not want to request more
4326                          * than it negotiated since it will refuse the read
4327                          * then.
4328                          */
4329                         if (!(tcon->ses->capabilities &
4330                                 tcon->ses->server->vals->cap_large_files)) {
4331                                 current_read_size = min_t(uint,
4332                                         current_read_size, CIFSMaxBufSize);
4333                         }
4334                         if (open_file->invalidHandle) {
4335                                 rc = cifs_reopen_file(open_file, true);
4336                                 if (rc != 0)
4337                                         break;
4338                         }
4339                         io_parms.pid = pid;
4340                         io_parms.tcon = tcon;
4341                         io_parms.offset = *offset;
4342                         io_parms.length = current_read_size;
4343                         io_parms.server = server;
4344                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4345                                                     &bytes_read, &cur_offset,
4346                                                     &buf_type);
4347                 } while (rc == -EAGAIN);
4348
4349                 if (rc || (bytes_read == 0)) {
4350                         if (total_read) {
4351                                 break;
4352                         } else {
4353                                 free_xid(xid);
4354                                 return rc;
4355                         }
4356                 } else {
4357                         cifs_stats_bytes_read(tcon, total_read);
4358                         *offset += bytes_read;
4359                 }
4360         }
4361         free_xid(xid);
4362         return total_read;
4363 }
4364
4365 /*
4366  * If the page is mmap'ed into a process' page tables, then we need to make
4367  * sure that it doesn't change while being written back.
4368  */
4369 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4370 {
4371         struct folio *folio = page_folio(vmf->page);
4372
4373         /* Wait for the folio to be written to the cache before we allow it to
4374          * be modified.  We then assume the entire folio will need writing back.
4375          */
4376 #ifdef CONFIG_CIFS_FSCACHE
4377         if (folio_test_fscache(folio) &&
4378             folio_wait_fscache_killable(folio) < 0)
4379                 return VM_FAULT_RETRY;
4380 #endif
4381
4382         folio_wait_writeback(folio);
4383
4384         if (folio_lock_killable(folio) < 0)
4385                 return VM_FAULT_RETRY;
4386         return VM_FAULT_LOCKED;
4387 }
4388
4389 static const struct vm_operations_struct cifs_file_vm_ops = {
4390         .fault = filemap_fault,
4391         .map_pages = filemap_map_pages,
4392         .page_mkwrite = cifs_page_mkwrite,
4393 };
4394
4395 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4396 {
4397         int xid, rc = 0;
4398         struct inode *inode = file_inode(file);
4399
4400         xid = get_xid();
4401
4402         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4403                 rc = cifs_zap_mapping(inode);
4404         if (!rc)
4405                 rc = generic_file_mmap(file, vma);
4406         if (!rc)
4407                 vma->vm_ops = &cifs_file_vm_ops;
4408
4409         free_xid(xid);
4410         return rc;
4411 }
4412
4413 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4414 {
4415         int rc, xid;
4416
4417         xid = get_xid();
4418
4419         rc = cifs_revalidate_file(file);
4420         if (rc)
4421                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4422                          rc);
4423         if (!rc)
4424                 rc = generic_file_mmap(file, vma);
4425         if (!rc)
4426                 vma->vm_ops = &cifs_file_vm_ops;
4427
4428         free_xid(xid);
4429         return rc;
4430 }
4431
4432 /*
4433  * Unlock a bunch of folios in the pagecache.
4434  */
4435 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4436 {
4437         struct folio *folio;
4438         XA_STATE(xas, &mapping->i_pages, first);
4439
4440         rcu_read_lock();
4441         xas_for_each(&xas, folio, last) {
4442                 folio_unlock(folio);
4443         }
4444         rcu_read_unlock();
4445 }
4446
4447 static void cifs_readahead_complete(struct work_struct *work)
4448 {
4449         struct cifs_readdata *rdata = container_of(work,
4450                                                    struct cifs_readdata, work);
4451         struct folio *folio;
4452         pgoff_t last;
4453         bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4454
4455         XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4456
4457         if (good)
4458                 cifs_readahead_to_fscache(rdata->mapping->host,
4459                                           rdata->offset, rdata->bytes);
4460
4461         if (iov_iter_count(&rdata->iter) > 0)
4462                 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4463
4464         last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4465
4466         rcu_read_lock();
4467         xas_for_each(&xas, folio, last) {
4468                 if (good) {
4469                         flush_dcache_folio(folio);
4470                         folio_mark_uptodate(folio);
4471                 }
4472                 folio_unlock(folio);
4473         }
4474         rcu_read_unlock();
4475
4476         kref_put(&rdata->refcount, cifs_readdata_release);
4477 }
4478
4479 static void cifs_readahead(struct readahead_control *ractl)
4480 {
4481         struct cifsFileInfo *open_file = ractl->file->private_data;
4482         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4483         struct TCP_Server_Info *server;
4484         unsigned int xid, nr_pages, cache_nr_pages = 0;
4485         unsigned int ra_pages;
4486         pgoff_t next_cached = ULONG_MAX, ra_index;
4487         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4488                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4489         bool check_cache = caching;
4490         pid_t pid;
4491         int rc = 0;
4492
4493         /* Note that readahead_count() lags behind our dequeuing of pages from
4494          * the ractl, wo we have to keep track for ourselves.
4495          */
4496         ra_pages = readahead_count(ractl);
4497         ra_index = readahead_index(ractl);
4498
4499         xid = get_xid();
4500
4501         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4502                 pid = open_file->pid;
4503         else
4504                 pid = current->tgid;
4505
4506         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4507
4508         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4509                  __func__, ractl->file, ractl->mapping, ra_pages);
4510
4511         /*
4512          * Chop the readahead request up into rsize-sized read requests.
4513          */
4514         while ((nr_pages = ra_pages)) {
4515                 unsigned int i, rsize;
4516                 struct cifs_readdata *rdata;
4517                 struct cifs_credits credits_on_stack;
4518                 struct cifs_credits *credits = &credits_on_stack;
4519                 struct folio *folio;
4520                 pgoff_t fsize;
4521
4522                 /*
4523                  * Find out if we have anything cached in the range of
4524                  * interest, and if so, where the next chunk of cached data is.
4525                  */
4526                 if (caching) {
4527                         if (check_cache) {
4528                                 rc = cifs_fscache_query_occupancy(
4529                                         ractl->mapping->host, ra_index, nr_pages,
4530                                         &next_cached, &cache_nr_pages);
4531                                 if (rc < 0)
4532                                         caching = false;
4533                                 check_cache = false;
4534                         }
4535
4536                         if (ra_index == next_cached) {
4537                                 /*
4538                                  * TODO: Send a whole batch of pages to be read
4539                                  * by the cache.
4540                                  */
4541                                 folio = readahead_folio(ractl);
4542                                 fsize = folio_nr_pages(folio);
4543                                 ra_pages -= fsize;
4544                                 ra_index += fsize;
4545                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4546                                                                &folio->page) < 0) {
4547                                         /*
4548                                          * TODO: Deal with cache read failure
4549                                          * here, but for the moment, delegate
4550                                          * that to readpage.
4551                                          */
4552                                         caching = false;
4553                                 }
4554                                 folio_unlock(folio);
4555                                 next_cached += fsize;
4556                                 cache_nr_pages -= fsize;
4557                                 if (cache_nr_pages == 0)
4558                                         check_cache = true;
4559                                 continue;
4560                         }
4561                 }
4562
4563                 if (open_file->invalidHandle) {
4564                         rc = cifs_reopen_file(open_file, true);
4565                         if (rc) {
4566                                 if (rc == -EAGAIN)
4567                                         continue;
4568                                 break;
4569                         }
4570                 }
4571
4572                 if (cifs_sb->ctx->rsize == 0)
4573                         cifs_sb->ctx->rsize =
4574                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4575                                                              cifs_sb->ctx);
4576
4577                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4578                                                    &rsize, credits);
4579                 if (rc)
4580                         break;
4581                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4582                 if (next_cached != ULONG_MAX)
4583                         nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4584
4585                 /*
4586                  * Give up immediately if rsize is too small to read an entire
4587                  * page. The VFS will fall back to readpage. We should never
4588                  * reach this point however since we set ra_pages to 0 when the
4589                  * rsize is smaller than a cache page.
4590                  */
4591                 if (unlikely(!nr_pages)) {
4592                         add_credits_and_wake_if(server, credits, 0);
4593                         break;
4594                 }
4595
4596                 rdata = cifs_readdata_alloc(cifs_readahead_complete);
4597                 if (!rdata) {
4598                         /* best to give up if we're out of mem */
4599                         add_credits_and_wake_if(server, credits, 0);
4600                         break;
4601                 }
4602
4603                 rdata->offset   = ra_index * PAGE_SIZE;
4604                 rdata->bytes    = nr_pages * PAGE_SIZE;
4605                 rdata->cfile    = cifsFileInfo_get(open_file);
4606                 rdata->server   = server;
4607                 rdata->mapping  = ractl->mapping;
4608                 rdata->pid      = pid;
4609                 rdata->credits  = credits_on_stack;
4610
4611                 for (i = 0; i < nr_pages; i++) {
4612                         if (!readahead_folio(ractl))
4613                                 WARN_ON(1);
4614                 }
4615                 ra_pages -= nr_pages;
4616                 ra_index += nr_pages;
4617
4618                 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4619                                 rdata->offset, rdata->bytes);
4620
4621                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4622                 if (!rc) {
4623                         if (rdata->cfile->invalidHandle)
4624                                 rc = -EAGAIN;
4625                         else
4626                                 rc = server->ops->async_readv(rdata);
4627                 }
4628
4629                 if (rc) {
4630                         add_credits_and_wake_if(server, &rdata->credits, 0);
4631                         cifs_unlock_folios(rdata->mapping,
4632                                            rdata->offset / PAGE_SIZE,
4633                                            (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4634                         /* Fallback to the readpage in error/reconnect cases */
4635                         kref_put(&rdata->refcount, cifs_readdata_release);
4636                         break;
4637                 }
4638
4639                 kref_put(&rdata->refcount, cifs_readdata_release);
4640         }
4641
4642         free_xid(xid);
4643 }
4644
4645 /*
4646  * cifs_readpage_worker must be called with the page pinned
4647  */
4648 static int cifs_readpage_worker(struct file *file, struct page *page,
4649         loff_t *poffset)
4650 {
4651         struct inode *inode = file_inode(file);
4652         struct timespec64 atime, mtime;
4653         char *read_data;
4654         int rc;
4655
4656         /* Is the page cached? */
4657         rc = cifs_readpage_from_fscache(inode, page);
4658         if (rc == 0)
4659                 goto read_complete;
4660
4661         read_data = kmap(page);
4662         /* for reads over a certain size could initiate async read ahead */
4663
4664         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4665
4666         if (rc < 0)
4667                 goto io_error;
4668         else
4669                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4670
4671         /* we do not want atime to be less than mtime, it broke some apps */
4672         atime = inode_set_atime_to_ts(inode, current_time(inode));
4673         mtime = inode_get_mtime(inode);
4674         if (timespec64_compare(&atime, &mtime) < 0)
4675                 inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4676
4677         if (PAGE_SIZE > rc)
4678                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4679
4680         flush_dcache_page(page);
4681         SetPageUptodate(page);
4682         rc = 0;
4683
4684 io_error:
4685         kunmap(page);
4686
4687 read_complete:
4688         unlock_page(page);
4689         return rc;
4690 }
4691
4692 static int cifs_read_folio(struct file *file, struct folio *folio)
4693 {
4694         struct page *page = &folio->page;
4695         loff_t offset = page_file_offset(page);
4696         int rc = -EACCES;
4697         unsigned int xid;
4698
4699         xid = get_xid();
4700
4701         if (file->private_data == NULL) {
4702                 rc = -EBADF;
4703                 free_xid(xid);
4704                 return rc;
4705         }
4706
4707         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4708                  page, (int)offset, (int)offset);
4709
4710         rc = cifs_readpage_worker(file, page, &offset);
4711
4712         free_xid(xid);
4713         return rc;
4714 }
4715
4716 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4717 {
4718         struct cifsFileInfo *open_file;
4719
4720         spin_lock(&cifs_inode->open_file_lock);
4721         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4722                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4723                         spin_unlock(&cifs_inode->open_file_lock);
4724                         return 1;
4725                 }
4726         }
4727         spin_unlock(&cifs_inode->open_file_lock);
4728         return 0;
4729 }
4730
4731 /* We do not want to update the file size from server for inodes
4732    open for write - to avoid races with writepage extending
4733    the file - in the future we could consider allowing
4734    refreshing the inode only on increases in the file size
4735    but this is tricky to do without racing with writebehind
4736    page caching in the current Linux kernel design */
4737 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4738 {
4739         if (!cifsInode)
4740                 return true;
4741
4742         if (is_inode_writable(cifsInode)) {
4743                 /* This inode is open for write at least once */
4744                 struct cifs_sb_info *cifs_sb;
4745
4746                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4747                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4748                         /* since no page cache to corrupt on directio
4749                         we can change size safely */
4750                         return true;
4751                 }
4752
4753                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4754                         return true;
4755
4756                 return false;
4757         } else
4758                 return true;
4759 }
4760
4761 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4762                         loff_t pos, unsigned len,
4763                         struct page **pagep, void **fsdata)
4764 {
4765         int oncethru = 0;
4766         pgoff_t index = pos >> PAGE_SHIFT;
4767         loff_t offset = pos & (PAGE_SIZE - 1);
4768         loff_t page_start = pos & PAGE_MASK;
4769         loff_t i_size;
4770         struct page *page;
4771         int rc = 0;
4772
4773         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4774
4775 start:
4776         page = grab_cache_page_write_begin(mapping, index);
4777         if (!page) {
4778                 rc = -ENOMEM;
4779                 goto out;
4780         }
4781
4782         if (PageUptodate(page))
4783                 goto out;
4784
4785         /*
4786          * If we write a full page it will be up to date, no need to read from
4787          * the server. If the write is short, we'll end up doing a sync write
4788          * instead.
4789          */
4790         if (len == PAGE_SIZE)
4791                 goto out;
4792
4793         /*
4794          * optimize away the read when we have an oplock, and we're not
4795          * expecting to use any of the data we'd be reading in. That
4796          * is, when the page lies beyond the EOF, or straddles the EOF
4797          * and the write will cover all of the existing data.
4798          */
4799         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4800                 i_size = i_size_read(mapping->host);
4801                 if (page_start >= i_size ||
4802                     (offset == 0 && (pos + len) >= i_size)) {
4803                         zero_user_segments(page, 0, offset,
4804                                            offset + len,
4805                                            PAGE_SIZE);
4806                         /*
4807                          * PageChecked means that the parts of the page
4808                          * to which we're not writing are considered up
4809                          * to date. Once the data is copied to the
4810                          * page, it can be set uptodate.
4811                          */
4812                         SetPageChecked(page);
4813                         goto out;
4814                 }
4815         }
4816
4817         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4818                 /*
4819                  * might as well read a page, it is fast enough. If we get
4820                  * an error, we don't need to return it. cifs_write_end will
4821                  * do a sync write instead since PG_uptodate isn't set.
4822                  */
4823                 cifs_readpage_worker(file, page, &page_start);
4824                 put_page(page);
4825                 oncethru = 1;
4826                 goto start;
4827         } else {
4828                 /* we could try using another file handle if there is one -
4829                    but how would we lock it to prevent close of that handle
4830                    racing with this read? In any case
4831                    this will be written out by write_end so is fine */
4832         }
4833 out:
4834         *pagep = page;
4835         return rc;
4836 }
4837
4838 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4839 {
4840         if (folio_test_private(folio))
4841                 return 0;
4842         if (folio_test_fscache(folio)) {
4843                 if (current_is_kswapd() || !(gfp & __GFP_FS))
4844                         return false;
4845                 folio_wait_fscache(folio);
4846         }
4847         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4848         return true;
4849 }
4850
4851 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4852                                  size_t length)
4853 {
4854         folio_wait_fscache(folio);
4855 }
4856
4857 static int cifs_launder_folio(struct folio *folio)
4858 {
4859         int rc = 0;
4860         loff_t range_start = folio_pos(folio);
4861         loff_t range_end = range_start + folio_size(folio);
4862         struct writeback_control wbc = {
4863                 .sync_mode = WB_SYNC_ALL,
4864                 .nr_to_write = 0,
4865                 .range_start = range_start,
4866                 .range_end = range_end,
4867         };
4868
4869         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4870
4871         if (folio_clear_dirty_for_io(folio))
4872                 rc = cifs_writepage_locked(&folio->page, &wbc);
4873
4874         folio_wait_fscache(folio);
4875         return rc;
4876 }
4877
4878 void cifs_oplock_break(struct work_struct *work)
4879 {
4880         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4881                                                   oplock_break);
4882         struct inode *inode = d_inode(cfile->dentry);
4883         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4884         struct cifsInodeInfo *cinode = CIFS_I(inode);
4885         struct cifs_tcon *tcon;
4886         struct TCP_Server_Info *server;
4887         struct tcon_link *tlink;
4888         int rc = 0;
4889         bool purge_cache = false, oplock_break_cancelled;
4890         __u64 persistent_fid, volatile_fid;
4891         __u16 net_fid;
4892
4893         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4894                         TASK_UNINTERRUPTIBLE);
4895
4896         tlink = cifs_sb_tlink(cifs_sb);
4897         if (IS_ERR(tlink))
4898                 goto out;
4899         tcon = tlink_tcon(tlink);
4900         server = tcon->ses->server;
4901
4902         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4903                                       cfile->oplock_epoch, &purge_cache);
4904
4905         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4906                                                 cifs_has_mand_locks(cinode)) {
4907                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4908                          inode);
4909                 cinode->oplock = 0;
4910         }
4911
4912         if (inode && S_ISREG(inode->i_mode)) {
4913                 if (CIFS_CACHE_READ(cinode))
4914                         break_lease(inode, O_RDONLY);
4915                 else
4916                         break_lease(inode, O_WRONLY);
4917                 rc = filemap_fdatawrite(inode->i_mapping);
4918                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4919                         rc = filemap_fdatawait(inode->i_mapping);
4920                         mapping_set_error(inode->i_mapping, rc);
4921                         cifs_zap_mapping(inode);
4922                 }
4923                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4924                 if (CIFS_CACHE_WRITE(cinode))
4925                         goto oplock_break_ack;
4926         }
4927
4928         rc = cifs_push_locks(cfile);
4929         if (rc)
4930                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4931
4932 oplock_break_ack:
4933         /*
4934          * When oplock break is received and there are no active
4935          * file handles but cached, then schedule deferred close immediately.
4936          * So, new open will not use cached handle.
4937          */
4938
4939         if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
4940                 cifs_close_deferred_file(cinode);
4941
4942         persistent_fid = cfile->fid.persistent_fid;
4943         volatile_fid = cfile->fid.volatile_fid;
4944         net_fid = cfile->fid.netfid;
4945         oplock_break_cancelled = cfile->oplock_break_cancelled;
4946
4947         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4948         /*
4949          * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
4950          * an acknowledgment to be sent when the file has already been closed.
4951          */
4952         spin_lock(&cinode->open_file_lock);
4953         /* check list empty since can race with kill_sb calling tree disconnect */
4954         if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
4955                 spin_unlock(&cinode->open_file_lock);
4956                 rc = server->ops->oplock_response(tcon, persistent_fid,
4957                                                   volatile_fid, net_fid, cinode);
4958                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4959         } else
4960                 spin_unlock(&cinode->open_file_lock);
4961
4962         cifs_put_tlink(tlink);
4963 out:
4964         cifs_done_oplock_break(cinode);
4965 }
4966
4967 /*
4968  * The presence of cifs_direct_io() in the address space ops vector
4969  * allowes open() O_DIRECT flags which would have failed otherwise.
4970  *
4971  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4972  * so this method should never be called.
4973  *
4974  * Direct IO is not yet supported in the cached mode.
4975  */
4976 static ssize_t
4977 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4978 {
4979         /*
4980          * FIXME
4981          * Eventually need to support direct IO for non forcedirectio mounts
4982          */
4983         return -EINVAL;
4984 }
4985
4986 static int cifs_swap_activate(struct swap_info_struct *sis,
4987                               struct file *swap_file, sector_t *span)
4988 {
4989         struct cifsFileInfo *cfile = swap_file->private_data;
4990         struct inode *inode = swap_file->f_mapping->host;
4991         unsigned long blocks;
4992         long long isize;
4993
4994         cifs_dbg(FYI, "swap activate\n");
4995
4996         if (!swap_file->f_mapping->a_ops->swap_rw)
4997                 /* Cannot support swap */
4998                 return -EINVAL;
4999
5000         spin_lock(&inode->i_lock);
5001         blocks = inode->i_blocks;
5002         isize = inode->i_size;
5003         spin_unlock(&inode->i_lock);
5004         if (blocks*512 < isize) {
5005                 pr_warn("swap activate: swapfile has holes\n");
5006                 return -EINVAL;
5007         }
5008         *span = sis->pages;
5009
5010         pr_warn_once("Swap support over SMB3 is experimental\n");
5011
5012         /*
5013          * TODO: consider adding ACL (or documenting how) to prevent other
5014          * users (on this or other systems) from reading it
5015          */
5016
5017
5018         /* TODO: add sk_set_memalloc(inet) or similar */
5019
5020         if (cfile)
5021                 cfile->swapfile = true;
5022         /*
5023          * TODO: Since file already open, we can't open with DENY_ALL here
5024          * but we could add call to grab a byte range lock to prevent others
5025          * from reading or writing the file
5026          */
5027
5028         sis->flags |= SWP_FS_OPS;
5029         return add_swap_extent(sis, 0, sis->max, 0);
5030 }
5031
5032 static void cifs_swap_deactivate(struct file *file)
5033 {
5034         struct cifsFileInfo *cfile = file->private_data;
5035
5036         cifs_dbg(FYI, "swap deactivate\n");
5037
5038         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5039
5040         if (cfile)
5041                 cfile->swapfile = false;
5042
5043         /* do we need to unpin (or unlock) the file */
5044 }
5045
5046 /*
5047  * Mark a page as having been made dirty and thus needing writeback.  We also
5048  * need to pin the cache object to write back to.
5049  */
5050 #ifdef CONFIG_CIFS_FSCACHE
5051 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5052 {
5053         return fscache_dirty_folio(mapping, folio,
5054                                         cifs_inode_cookie(mapping->host));
5055 }
5056 #else
5057 #define cifs_dirty_folio filemap_dirty_folio
5058 #endif
5059
5060 const struct address_space_operations cifs_addr_ops = {
5061         .read_folio = cifs_read_folio,
5062         .readahead = cifs_readahead,
5063         .writepages = cifs_writepages,
5064         .write_begin = cifs_write_begin,
5065         .write_end = cifs_write_end,
5066         .dirty_folio = cifs_dirty_folio,
5067         .release_folio = cifs_release_folio,
5068         .direct_IO = cifs_direct_io,
5069         .invalidate_folio = cifs_invalidate_folio,
5070         .launder_folio = cifs_launder_folio,
5071         .migrate_folio = filemap_migrate_folio,
5072         /*
5073          * TODO: investigate and if useful we could add an is_dirty_writeback
5074          * helper if needed
5075          */
5076         .swap_activate = cifs_swap_activate,
5077         .swap_deactivate = cifs_swap_deactivate,
5078 };
5079
5080 /*
5081  * cifs_readahead requires the server to support a buffer large enough to
5082  * contain the header plus one complete page of data.  Otherwise, we need
5083  * to leave cifs_readahead out of the address space operations.
5084  */
5085 const struct address_space_operations cifs_addr_ops_smallbuf = {
5086         .read_folio = cifs_read_folio,
5087         .writepages = cifs_writepages,
5088         .write_begin = cifs_write_begin,
5089         .write_end = cifs_write_end,
5090         .dirty_folio = cifs_dirty_folio,
5091         .release_folio = cifs_release_folio,
5092         .invalidate_folio = cifs_invalidate_folio,
5093         .launder_folio = cifs_launder_folio,
5094         .migrate_folio = filemap_migrate_folio,
5095 };