GNU Linux-libre 6.8.7-gnu
[releases.git] / fs / smb / client / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45         struct address_space *mapping = inode->i_mapping;
46         struct folio *folio;
47         pgoff_t end;
48
49         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50
51         rcu_read_lock();
52
53         end = (start + len - 1) / PAGE_SIZE;
54         xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55                 if (xas_retry(&xas, folio))
56                         continue;
57                 xas_pause(&xas);
58                 rcu_read_unlock();
59                 folio_lock(folio);
60                 folio_clear_dirty_for_io(folio);
61                 folio_unlock(folio);
62                 rcu_read_lock();
63         }
64
65         rcu_read_unlock();
66 }
67
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73         struct address_space *mapping = inode->i_mapping;
74         struct folio *folio;
75         pgoff_t end;
76
77         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78
79         if (!len)
80                 return;
81
82         rcu_read_lock();
83
84         end = (start + len - 1) / PAGE_SIZE;
85         xas_for_each(&xas, folio, end) {
86                 if (xas_retry(&xas, folio))
87                         continue;
88                 if (!folio_test_writeback(folio)) {
89                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90                                   len, start, folio->index, end);
91                         continue;
92                 }
93
94                 folio_detach_private(folio);
95                 folio_end_writeback(folio);
96         }
97
98         rcu_read_unlock();
99 }
100
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106         struct address_space *mapping = inode->i_mapping;
107         struct folio *folio;
108         pgoff_t end;
109
110         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111
112         if (!len)
113                 return;
114
115         rcu_read_lock();
116
117         end = (start + len - 1) / PAGE_SIZE;
118         xas_for_each(&xas, folio, end) {
119                 if (xas_retry(&xas, folio))
120                         continue;
121                 if (!folio_test_writeback(folio)) {
122                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123                                   len, start, folio->index, end);
124                         continue;
125                 }
126
127                 folio_set_error(folio);
128                 folio_end_writeback(folio);
129         }
130
131         rcu_read_unlock();
132 }
133
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139         struct address_space *mapping = inode->i_mapping;
140         struct folio *folio;
141         pgoff_t end;
142
143         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144
145         if (!len)
146                 return;
147
148         rcu_read_lock();
149
150         end = (start + len - 1) / PAGE_SIZE;
151         xas_for_each(&xas, folio, end) {
152                 if (!folio_test_writeback(folio)) {
153                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154                                   len, start, folio->index, end);
155                         continue;
156                 }
157
158                 filemap_dirty_folio(folio->mapping, folio);
159                 folio_end_writeback(folio);
160         }
161
162         rcu_read_unlock();
163 }
164
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172         struct cifsFileInfo *open_file = NULL;
173         struct list_head *tmp;
174         struct list_head *tmp1;
175
176         /* only send once per connect */
177         spin_lock(&tcon->tc_lock);
178         if (tcon->need_reconnect)
179                 tcon->status = TID_NEED_RECON;
180
181         if (tcon->status != TID_NEED_RECON) {
182                 spin_unlock(&tcon->tc_lock);
183                 return;
184         }
185         tcon->status = TID_IN_FILES_INVALIDATE;
186         spin_unlock(&tcon->tc_lock);
187
188         /* list all files open on tree connection and mark them invalid */
189         spin_lock(&tcon->open_file_lock);
190         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192                 open_file->invalidHandle = true;
193                 open_file->oplock_break_cancelled = true;
194         }
195         spin_unlock(&tcon->open_file_lock);
196
197         invalidate_all_cached_dirs(tcon);
198         spin_lock(&tcon->tc_lock);
199         if (tcon->status == TID_IN_FILES_INVALIDATE)
200                 tcon->status = TID_NEED_TCON;
201         spin_unlock(&tcon->tc_lock);
202
203         /*
204          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205          * to this tcon.
206          */
207 }
208
209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
210 {
211         if ((flags & O_ACCMODE) == O_RDONLY)
212                 return GENERIC_READ;
213         else if ((flags & O_ACCMODE) == O_WRONLY)
214                 return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
215         else if ((flags & O_ACCMODE) == O_RDWR) {
216                 /* GENERIC_ALL is too much permission to request
217                    can cause unnecessary access denied on create */
218                 /* return GENERIC_ALL; */
219                 return (GENERIC_READ | GENERIC_WRITE);
220         }
221
222         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224                 FILE_READ_DATA);
225 }
226
227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228 static u32 cifs_posix_convert_flags(unsigned int flags)
229 {
230         u32 posix_flags = 0;
231
232         if ((flags & O_ACCMODE) == O_RDONLY)
233                 posix_flags = SMB_O_RDONLY;
234         else if ((flags & O_ACCMODE) == O_WRONLY)
235                 posix_flags = SMB_O_WRONLY;
236         else if ((flags & O_ACCMODE) == O_RDWR)
237                 posix_flags = SMB_O_RDWR;
238
239         if (flags & O_CREAT) {
240                 posix_flags |= SMB_O_CREAT;
241                 if (flags & O_EXCL)
242                         posix_flags |= SMB_O_EXCL;
243         } else if (flags & O_EXCL)
244                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245                          current->comm, current->tgid);
246
247         if (flags & O_TRUNC)
248                 posix_flags |= SMB_O_TRUNC;
249         /* be safe and imply O_SYNC for O_DSYNC */
250         if (flags & O_DSYNC)
251                 posix_flags |= SMB_O_SYNC;
252         if (flags & O_DIRECTORY)
253                 posix_flags |= SMB_O_DIRECTORY;
254         if (flags & O_NOFOLLOW)
255                 posix_flags |= SMB_O_NOFOLLOW;
256         if (flags & O_DIRECT)
257                 posix_flags |= SMB_O_DIRECT;
258
259         return posix_flags;
260 }
261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262
263 static inline int cifs_get_disposition(unsigned int flags)
264 {
265         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266                 return FILE_CREATE;
267         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268                 return FILE_OVERWRITE_IF;
269         else if ((flags & O_CREAT) == O_CREAT)
270                 return FILE_OPEN_IF;
271         else if ((flags & O_TRUNC) == O_TRUNC)
272                 return FILE_OVERWRITE;
273         else
274                 return FILE_OPEN;
275 }
276
277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278 int cifs_posix_open(const char *full_path, struct inode **pinode,
279                         struct super_block *sb, int mode, unsigned int f_flags,
280                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
281 {
282         int rc;
283         FILE_UNIX_BASIC_INFO *presp_data;
284         __u32 posix_flags = 0;
285         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286         struct cifs_fattr fattr;
287         struct tcon_link *tlink;
288         struct cifs_tcon *tcon;
289
290         cifs_dbg(FYI, "posix open %s\n", full_path);
291
292         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293         if (presp_data == NULL)
294                 return -ENOMEM;
295
296         tlink = cifs_sb_tlink(cifs_sb);
297         if (IS_ERR(tlink)) {
298                 rc = PTR_ERR(tlink);
299                 goto posix_open_ret;
300         }
301
302         tcon = tlink_tcon(tlink);
303         mode &= ~current_umask();
304
305         posix_flags = cifs_posix_convert_flags(f_flags);
306         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307                              poplock, full_path, cifs_sb->local_nls,
308                              cifs_remap(cifs_sb));
309         cifs_put_tlink(tlink);
310
311         if (rc)
312                 goto posix_open_ret;
313
314         if (presp_data->Type == cpu_to_le32(-1))
315                 goto posix_open_ret; /* open ok, caller does qpathinfo */
316
317         if (!pinode)
318                 goto posix_open_ret; /* caller does not need info */
319
320         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321
322         /* get new inode and set it up */
323         if (*pinode == NULL) {
324                 cifs_fill_uniqueid(sb, &fattr);
325                 *pinode = cifs_iget(sb, &fattr);
326                 if (!*pinode) {
327                         rc = -ENOMEM;
328                         goto posix_open_ret;
329                 }
330         } else {
331                 cifs_revalidate_mapping(*pinode);
332                 rc = cifs_fattr_to_inode(*pinode, &fattr, false);
333         }
334
335 posix_open_ret:
336         kfree(presp_data);
337         return rc;
338 }
339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340
341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342                         struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343                         struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344 {
345         int rc;
346         int desired_access;
347         int disposition;
348         int create_options = CREATE_NOT_DIR;
349         struct TCP_Server_Info *server = tcon->ses->server;
350         struct cifs_open_parms oparms;
351         int rdwr_for_fscache = 0;
352
353         if (!server->ops->open)
354                 return -ENOSYS;
355
356         /* If we're caching, we need to be able to fill in around partial writes. */
357         if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
358                 rdwr_for_fscache = 1;
359
360         desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
361
362 /*********************************************************************
363  *  open flag mapping table:
364  *
365  *      POSIX Flag            CIFS Disposition
366  *      ----------            ----------------
367  *      O_CREAT               FILE_OPEN_IF
368  *      O_CREAT | O_EXCL      FILE_CREATE
369  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
370  *      O_TRUNC               FILE_OVERWRITE
371  *      none of the above     FILE_OPEN
372  *
373  *      Note that there is not a direct match between disposition
374  *      FILE_SUPERSEDE (ie create whether or not file exists although
375  *      O_CREAT | O_TRUNC is similar but truncates the existing
376  *      file rather than creating a new file as FILE_SUPERSEDE does
377  *      (which uses the attributes / metadata passed in on open call)
378  *?
379  *?  O_SYNC is a reasonable match to CIFS writethrough flag
380  *?  and the read write flags match reasonably.  O_LARGEFILE
381  *?  is irrelevant because largefile support is always used
382  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
383  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
384  *********************************************************************/
385
386         disposition = cifs_get_disposition(f_flags);
387
388         /* BB pass O_SYNC flag through on file attributes .. BB */
389
390         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
391         if (f_flags & O_SYNC)
392                 create_options |= CREATE_WRITE_THROUGH;
393
394         if (f_flags & O_DIRECT)
395                 create_options |= CREATE_NO_BUFFER;
396
397 retry_open:
398         oparms = (struct cifs_open_parms) {
399                 .tcon = tcon,
400                 .cifs_sb = cifs_sb,
401                 .desired_access = desired_access,
402                 .create_options = cifs_create_options(cifs_sb, create_options),
403                 .disposition = disposition,
404                 .path = full_path,
405                 .fid = fid,
406         };
407
408         rc = server->ops->open(xid, &oparms, oplock, buf);
409         if (rc) {
410                 if (rc == -EACCES && rdwr_for_fscache == 1) {
411                         desired_access = cifs_convert_flags(f_flags, 0);
412                         rdwr_for_fscache = 2;
413                         goto retry_open;
414                 }
415                 return rc;
416         }
417         if (rdwr_for_fscache == 2)
418                 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
419
420         /* TODO: Add support for calling posix query info but with passing in fid */
421         if (tcon->unix_ext)
422                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
423                                               xid);
424         else
425                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
426                                          xid, fid);
427
428         if (rc) {
429                 server->ops->close(xid, tcon, fid);
430                 if (rc == -ESTALE)
431                         rc = -EOPENSTALE;
432         }
433
434         return rc;
435 }
436
437 static bool
438 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
439 {
440         struct cifs_fid_locks *cur;
441         bool has_locks = false;
442
443         down_read(&cinode->lock_sem);
444         list_for_each_entry(cur, &cinode->llist, llist) {
445                 if (!list_empty(&cur->locks)) {
446                         has_locks = true;
447                         break;
448                 }
449         }
450         up_read(&cinode->lock_sem);
451         return has_locks;
452 }
453
454 void
455 cifs_down_write(struct rw_semaphore *sem)
456 {
457         while (!down_write_trylock(sem))
458                 msleep(10);
459 }
460
461 static void cifsFileInfo_put_work(struct work_struct *work);
462 void serverclose_work(struct work_struct *work);
463
464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
465                                        struct tcon_link *tlink, __u32 oplock,
466                                        const char *symlink_target)
467 {
468         struct dentry *dentry = file_dentry(file);
469         struct inode *inode = d_inode(dentry);
470         struct cifsInodeInfo *cinode = CIFS_I(inode);
471         struct cifsFileInfo *cfile;
472         struct cifs_fid_locks *fdlocks;
473         struct cifs_tcon *tcon = tlink_tcon(tlink);
474         struct TCP_Server_Info *server = tcon->ses->server;
475
476         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
477         if (cfile == NULL)
478                 return cfile;
479
480         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
481         if (!fdlocks) {
482                 kfree(cfile);
483                 return NULL;
484         }
485
486         if (symlink_target) {
487                 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
488                 if (!cfile->symlink_target) {
489                         kfree(fdlocks);
490                         kfree(cfile);
491                         return NULL;
492                 }
493         }
494
495         INIT_LIST_HEAD(&fdlocks->locks);
496         fdlocks->cfile = cfile;
497         cfile->llist = fdlocks;
498
499         cfile->count = 1;
500         cfile->pid = current->tgid;
501         cfile->uid = current_fsuid();
502         cfile->dentry = dget(dentry);
503         cfile->f_flags = file->f_flags;
504         cfile->invalidHandle = false;
505         cfile->deferred_close_scheduled = false;
506         cfile->tlink = cifs_get_tlink(tlink);
507         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
508         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
509         INIT_WORK(&cfile->serverclose, serverclose_work);
510         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
511         mutex_init(&cfile->fh_mutex);
512         spin_lock_init(&cfile->file_info_lock);
513
514         cifs_sb_active(inode->i_sb);
515
516         /*
517          * If the server returned a read oplock and we have mandatory brlocks,
518          * set oplock level to None.
519          */
520         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
521                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
522                 oplock = 0;
523         }
524
525         cifs_down_write(&cinode->lock_sem);
526         list_add(&fdlocks->llist, &cinode->llist);
527         up_write(&cinode->lock_sem);
528
529         spin_lock(&tcon->open_file_lock);
530         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
531                 oplock = fid->pending_open->oplock;
532         list_del(&fid->pending_open->olist);
533
534         fid->purge_cache = false;
535         server->ops->set_fid(cfile, fid, oplock);
536
537         list_add(&cfile->tlist, &tcon->openFileList);
538         atomic_inc(&tcon->num_local_opens);
539
540         /* if readable file instance put first in list*/
541         spin_lock(&cinode->open_file_lock);
542         if (file->f_mode & FMODE_READ)
543                 list_add(&cfile->flist, &cinode->openFileList);
544         else
545                 list_add_tail(&cfile->flist, &cinode->openFileList);
546         spin_unlock(&cinode->open_file_lock);
547         spin_unlock(&tcon->open_file_lock);
548
549         if (fid->purge_cache)
550                 cifs_zap_mapping(inode);
551
552         file->private_data = cfile;
553         return cfile;
554 }
555
556 struct cifsFileInfo *
557 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
558 {
559         spin_lock(&cifs_file->file_info_lock);
560         cifsFileInfo_get_locked(cifs_file);
561         spin_unlock(&cifs_file->file_info_lock);
562         return cifs_file;
563 }
564
565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
566 {
567         struct inode *inode = d_inode(cifs_file->dentry);
568         struct cifsInodeInfo *cifsi = CIFS_I(inode);
569         struct cifsLockInfo *li, *tmp;
570         struct super_block *sb = inode->i_sb;
571
572         /*
573          * Delete any outstanding lock records. We'll lose them when the file
574          * is closed anyway.
575          */
576         cifs_down_write(&cifsi->lock_sem);
577         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
578                 list_del(&li->llist);
579                 cifs_del_lock_waiters(li);
580                 kfree(li);
581         }
582         list_del(&cifs_file->llist->llist);
583         kfree(cifs_file->llist);
584         up_write(&cifsi->lock_sem);
585
586         cifs_put_tlink(cifs_file->tlink);
587         dput(cifs_file->dentry);
588         cifs_sb_deactive(sb);
589         kfree(cifs_file->symlink_target);
590         kfree(cifs_file);
591 }
592
593 static void cifsFileInfo_put_work(struct work_struct *work)
594 {
595         struct cifsFileInfo *cifs_file = container_of(work,
596                         struct cifsFileInfo, put);
597
598         cifsFileInfo_put_final(cifs_file);
599 }
600
601 void serverclose_work(struct work_struct *work)
602 {
603         struct cifsFileInfo *cifs_file = container_of(work,
604                         struct cifsFileInfo, serverclose);
605
606         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
607
608         struct TCP_Server_Info *server = tcon->ses->server;
609         int rc = 0;
610         int retries = 0;
611         int MAX_RETRIES = 4;
612
613         do {
614                 if (server->ops->close_getattr)
615                         rc = server->ops->close_getattr(0, tcon, cifs_file);
616                 else if (server->ops->close)
617                         rc = server->ops->close(0, tcon, &cifs_file->fid);
618
619                 if (rc == -EBUSY || rc == -EAGAIN) {
620                         retries++;
621                         msleep(250);
622                 }
623         } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
624         );
625
626         if (retries == MAX_RETRIES)
627                 pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
628
629         if (cifs_file->offload)
630                 queue_work(fileinfo_put_wq, &cifs_file->put);
631         else
632                 cifsFileInfo_put_final(cifs_file);
633 }
634
635 /**
636  * cifsFileInfo_put - release a reference of file priv data
637  *
638  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
639  *
640  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
641  */
642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
643 {
644         _cifsFileInfo_put(cifs_file, true, true);
645 }
646
647 /**
648  * _cifsFileInfo_put - release a reference of file priv data
649  *
650  * This may involve closing the filehandle @cifs_file out on the
651  * server. Must be called without holding tcon->open_file_lock,
652  * cinode->open_file_lock and cifs_file->file_info_lock.
653  *
654  * If @wait_for_oplock_handler is true and we are releasing the last
655  * reference, wait for any running oplock break handler of the file
656  * and cancel any pending one.
657  *
658  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
659  * @wait_oplock_handler: must be false if called from oplock_break_handler
660  * @offload:    not offloaded on close and oplock breaks
661  *
662  */
663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
664                        bool wait_oplock_handler, bool offload)
665 {
666         struct inode *inode = d_inode(cifs_file->dentry);
667         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
668         struct TCP_Server_Info *server = tcon->ses->server;
669         struct cifsInodeInfo *cifsi = CIFS_I(inode);
670         struct super_block *sb = inode->i_sb;
671         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
672         struct cifs_fid fid = {};
673         struct cifs_pending_open open;
674         bool oplock_break_cancelled;
675         bool serverclose_offloaded = false;
676
677         spin_lock(&tcon->open_file_lock);
678         spin_lock(&cifsi->open_file_lock);
679         spin_lock(&cifs_file->file_info_lock);
680
681         cifs_file->offload = offload;
682         if (--cifs_file->count > 0) {
683                 spin_unlock(&cifs_file->file_info_lock);
684                 spin_unlock(&cifsi->open_file_lock);
685                 spin_unlock(&tcon->open_file_lock);
686                 return;
687         }
688         spin_unlock(&cifs_file->file_info_lock);
689
690         if (server->ops->get_lease_key)
691                 server->ops->get_lease_key(inode, &fid);
692
693         /* store open in pending opens to make sure we don't miss lease break */
694         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
695
696         /* remove it from the lists */
697         list_del(&cifs_file->flist);
698         list_del(&cifs_file->tlist);
699         atomic_dec(&tcon->num_local_opens);
700
701         if (list_empty(&cifsi->openFileList)) {
702                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
703                          d_inode(cifs_file->dentry));
704                 /*
705                  * In strict cache mode we need invalidate mapping on the last
706                  * close  because it may cause a error when we open this file
707                  * again and get at least level II oplock.
708                  */
709                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
710                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
711                 cifs_set_oplock_level(cifsi, 0);
712         }
713
714         spin_unlock(&cifsi->open_file_lock);
715         spin_unlock(&tcon->open_file_lock);
716
717         oplock_break_cancelled = wait_oplock_handler ?
718                 cancel_work_sync(&cifs_file->oplock_break) : false;
719
720         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
721                 struct TCP_Server_Info *server = tcon->ses->server;
722                 unsigned int xid;
723                 int rc = 0;
724
725                 xid = get_xid();
726                 if (server->ops->close_getattr)
727                         rc = server->ops->close_getattr(xid, tcon, cifs_file);
728                 else if (server->ops->close)
729                         rc = server->ops->close(xid, tcon, &cifs_file->fid);
730                 _free_xid(xid);
731
732                 if (rc == -EBUSY || rc == -EAGAIN) {
733                         // Server close failed, hence offloading it as an async op
734                         queue_work(serverclose_wq, &cifs_file->serverclose);
735                         serverclose_offloaded = true;
736                 }
737         }
738
739         if (oplock_break_cancelled)
740                 cifs_done_oplock_break(cifsi);
741
742         cifs_del_pending_open(&open);
743
744         // if serverclose has been offloaded to wq (on failure), it will
745         // handle offloading put as well. If serverclose not offloaded,
746         // we need to handle offloading put here.
747         if (!serverclose_offloaded) {
748                 if (offload)
749                         queue_work(fileinfo_put_wq, &cifs_file->put);
750                 else
751                         cifsFileInfo_put_final(cifs_file);
752         }
753 }
754
755 int cifs_open(struct inode *inode, struct file *file)
756
757 {
758         int rc = -EACCES;
759         unsigned int xid;
760         __u32 oplock;
761         struct cifs_sb_info *cifs_sb;
762         struct TCP_Server_Info *server;
763         struct cifs_tcon *tcon;
764         struct tcon_link *tlink;
765         struct cifsFileInfo *cfile = NULL;
766         void *page;
767         const char *full_path;
768         bool posix_open_ok = false;
769         struct cifs_fid fid = {};
770         struct cifs_pending_open open;
771         struct cifs_open_info_data data = {};
772
773         xid = get_xid();
774
775         cifs_sb = CIFS_SB(inode->i_sb);
776         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
777                 free_xid(xid);
778                 return -EIO;
779         }
780
781         tlink = cifs_sb_tlink(cifs_sb);
782         if (IS_ERR(tlink)) {
783                 free_xid(xid);
784                 return PTR_ERR(tlink);
785         }
786         tcon = tlink_tcon(tlink);
787         server = tcon->ses->server;
788
789         page = alloc_dentry_path();
790         full_path = build_path_from_dentry(file_dentry(file), page);
791         if (IS_ERR(full_path)) {
792                 rc = PTR_ERR(full_path);
793                 goto out;
794         }
795
796         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
797                  inode, file->f_flags, full_path);
798
799         if (file->f_flags & O_DIRECT &&
800             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
801                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
802                         file->f_op = &cifs_file_direct_nobrl_ops;
803                 else
804                         file->f_op = &cifs_file_direct_ops;
805         }
806
807         /* Get the cached handle as SMB2 close is deferred */
808         rc = cifs_get_readable_path(tcon, full_path, &cfile);
809         if (rc == 0) {
810                 if (file->f_flags == cfile->f_flags) {
811                         file->private_data = cfile;
812                         spin_lock(&CIFS_I(inode)->deferred_lock);
813                         cifs_del_deferred_close(cfile);
814                         spin_unlock(&CIFS_I(inode)->deferred_lock);
815                         goto use_cache;
816                 } else {
817                         _cifsFileInfo_put(cfile, true, false);
818                 }
819         }
820
821         if (server->oplocks)
822                 oplock = REQ_OPLOCK;
823         else
824                 oplock = 0;
825
826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
827         if (!tcon->broken_posix_open && tcon->unix_ext &&
828             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
829                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
830                 /* can not refresh inode info since size could be stale */
831                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
832                                 cifs_sb->ctx->file_mode /* ignored */,
833                                 file->f_flags, &oplock, &fid.netfid, xid);
834                 if (rc == 0) {
835                         cifs_dbg(FYI, "posix open succeeded\n");
836                         posix_open_ok = true;
837                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
838                         if (tcon->ses->serverNOS)
839                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
840                                          tcon->ses->ip_addr,
841                                          tcon->ses->serverNOS);
842                         tcon->broken_posix_open = true;
843                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
844                          (rc != -EOPNOTSUPP)) /* path not found or net err */
845                         goto out;
846                 /*
847                  * Else fallthrough to retry open the old way on network i/o
848                  * or DFS errors.
849                  */
850         }
851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
852
853         if (server->ops->get_lease_key)
854                 server->ops->get_lease_key(inode, &fid);
855
856         cifs_add_pending_open(&fid, tlink, &open);
857
858         if (!posix_open_ok) {
859                 if (server->ops->get_lease_key)
860                         server->ops->get_lease_key(inode, &fid);
861
862                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
863                                   xid, &data);
864                 if (rc) {
865                         cifs_del_pending_open(&open);
866                         goto out;
867                 }
868         }
869
870         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
871         if (cfile == NULL) {
872                 if (server->ops->close)
873                         server->ops->close(xid, tcon, &fid);
874                 cifs_del_pending_open(&open);
875                 rc = -ENOMEM;
876                 goto out;
877         }
878
879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
880         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
881                 /*
882                  * Time to set mode which we can not set earlier due to
883                  * problems creating new read-only files.
884                  */
885                 struct cifs_unix_set_info_args args = {
886                         .mode   = inode->i_mode,
887                         .uid    = INVALID_UID, /* no change */
888                         .gid    = INVALID_GID, /* no change */
889                         .ctime  = NO_CHANGE_64,
890                         .atime  = NO_CHANGE_64,
891                         .mtime  = NO_CHANGE_64,
892                         .device = 0,
893                 };
894                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
895                                        cfile->pid);
896         }
897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
898
899 use_cache:
900         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
901                            file->f_mode & FMODE_WRITE);
902         if (!(file->f_flags & O_DIRECT))
903                 goto out;
904         if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
905                 goto out;
906         cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
907
908 out:
909         free_dentry_path(page);
910         free_xid(xid);
911         cifs_put_tlink(tlink);
912         cifs_free_open_info(&data);
913         return rc;
914 }
915
916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
919
920 /*
921  * Try to reacquire byte range locks that were released when session
922  * to server was lost.
923  */
924 static int
925 cifs_relock_file(struct cifsFileInfo *cfile)
926 {
927         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
928         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
929         int rc = 0;
930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
931         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
933
934         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
935         if (cinode->can_cache_brlcks) {
936                 /* can cache locks - no need to relock */
937                 up_read(&cinode->lock_sem);
938                 return rc;
939         }
940
941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
942         if (cap_unix(tcon->ses) &&
943             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
944             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
945                 rc = cifs_push_posix_locks(cfile);
946         else
947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
948                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
949
950         up_read(&cinode->lock_sem);
951         return rc;
952 }
953
954 static int
955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
956 {
957         int rc = -EACCES;
958         unsigned int xid;
959         __u32 oplock;
960         struct cifs_sb_info *cifs_sb;
961         struct cifs_tcon *tcon;
962         struct TCP_Server_Info *server;
963         struct cifsInodeInfo *cinode;
964         struct inode *inode;
965         void *page;
966         const char *full_path;
967         int desired_access;
968         int disposition = FILE_OPEN;
969         int create_options = CREATE_NOT_DIR;
970         struct cifs_open_parms oparms;
971         int rdwr_for_fscache = 0;
972
973         xid = get_xid();
974         mutex_lock(&cfile->fh_mutex);
975         if (!cfile->invalidHandle) {
976                 mutex_unlock(&cfile->fh_mutex);
977                 free_xid(xid);
978                 return 0;
979         }
980
981         inode = d_inode(cfile->dentry);
982         cifs_sb = CIFS_SB(inode->i_sb);
983         tcon = tlink_tcon(cfile->tlink);
984         server = tcon->ses->server;
985
986         /*
987          * Can not grab rename sem here because various ops, including those
988          * that already have the rename sem can end up causing writepage to get
989          * called and if the server was down that means we end up here, and we
990          * can never tell if the caller already has the rename_sem.
991          */
992         page = alloc_dentry_path();
993         full_path = build_path_from_dentry(cfile->dentry, page);
994         if (IS_ERR(full_path)) {
995                 mutex_unlock(&cfile->fh_mutex);
996                 free_dentry_path(page);
997                 free_xid(xid);
998                 return PTR_ERR(full_path);
999         }
1000
1001         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
1002                  inode, cfile->f_flags, full_path);
1003
1004         if (tcon->ses->server->oplocks)
1005                 oplock = REQ_OPLOCK;
1006         else
1007                 oplock = 0;
1008
1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1010         if (tcon->unix_ext && cap_unix(tcon->ses) &&
1011             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
1012                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
1013                 /*
1014                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
1015                  * original open. Must mask them off for a reopen.
1016                  */
1017                 unsigned int oflags = cfile->f_flags &
1018                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
1019
1020                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
1021                                      cifs_sb->ctx->file_mode /* ignored */,
1022                                      oflags, &oplock, &cfile->fid.netfid, xid);
1023                 if (rc == 0) {
1024                         cifs_dbg(FYI, "posix reopen succeeded\n");
1025                         oparms.reconnect = true;
1026                         goto reopen_success;
1027                 }
1028                 /*
1029                  * fallthrough to retry open the old way on errors, especially
1030                  * in the reconnect path it is important to retry hard
1031                  */
1032         }
1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1034
1035         /* If we're caching, we need to be able to fill in around partial writes. */
1036         if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
1037                 rdwr_for_fscache = 1;
1038
1039         desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
1040
1041         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
1042         if (cfile->f_flags & O_SYNC)
1043                 create_options |= CREATE_WRITE_THROUGH;
1044
1045         if (cfile->f_flags & O_DIRECT)
1046                 create_options |= CREATE_NO_BUFFER;
1047
1048         if (server->ops->get_lease_key)
1049                 server->ops->get_lease_key(inode, &cfile->fid);
1050
1051 retry_open:
1052         oparms = (struct cifs_open_parms) {
1053                 .tcon = tcon,
1054                 .cifs_sb = cifs_sb,
1055                 .desired_access = desired_access,
1056                 .create_options = cifs_create_options(cifs_sb, create_options),
1057                 .disposition = disposition,
1058                 .path = full_path,
1059                 .fid = &cfile->fid,
1060                 .reconnect = true,
1061         };
1062
1063         /*
1064          * Can not refresh inode by passing in file_info buf to be returned by
1065          * ops->open and then calling get_inode_info with returned buf since
1066          * file might have write behind data that needs to be flushed and server
1067          * version of file size can be stale. If we knew for sure that inode was
1068          * not dirty locally we could do this.
1069          */
1070         rc = server->ops->open(xid, &oparms, &oplock, NULL);
1071         if (rc == -ENOENT && oparms.reconnect == false) {
1072                 /* durable handle timeout is expired - open the file again */
1073                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
1074                 /* indicate that we need to relock the file */
1075                 oparms.reconnect = true;
1076         }
1077         if (rc == -EACCES && rdwr_for_fscache == 1) {
1078                 desired_access = cifs_convert_flags(cfile->f_flags, 0);
1079                 rdwr_for_fscache = 2;
1080                 goto retry_open;
1081         }
1082
1083         if (rc) {
1084                 mutex_unlock(&cfile->fh_mutex);
1085                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1086                 cifs_dbg(FYI, "oplock: %d\n", oplock);
1087                 goto reopen_error_exit;
1088         }
1089
1090         if (rdwr_for_fscache == 2)
1091                 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
1092
1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1094 reopen_success:
1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1096         cfile->invalidHandle = false;
1097         mutex_unlock(&cfile->fh_mutex);
1098         cinode = CIFS_I(inode);
1099
1100         if (can_flush) {
1101                 rc = filemap_write_and_wait(inode->i_mapping);
1102                 if (!is_interrupt_error(rc))
1103                         mapping_set_error(inode->i_mapping, rc);
1104
1105                 if (tcon->posix_extensions) {
1106                         rc = smb311_posix_get_inode_info(&inode, full_path,
1107                                                          NULL, inode->i_sb, xid);
1108                 } else if (tcon->unix_ext) {
1109                         rc = cifs_get_inode_info_unix(&inode, full_path,
1110                                                       inode->i_sb, xid);
1111                 } else {
1112                         rc = cifs_get_inode_info(&inode, full_path, NULL,
1113                                                  inode->i_sb, xid, NULL);
1114                 }
1115         }
1116         /*
1117          * Else we are writing out data to server already and could deadlock if
1118          * we tried to flush data, and since we do not know if we have data that
1119          * would invalidate the current end of file on the server we can not go
1120          * to the server to get the new inode info.
1121          */
1122
1123         /*
1124          * If the server returned a read oplock and we have mandatory brlocks,
1125          * set oplock level to None.
1126          */
1127         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1128                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1129                 oplock = 0;
1130         }
1131
1132         server->ops->set_fid(cfile, &cfile->fid, oplock);
1133         if (oparms.reconnect)
1134                 cifs_relock_file(cfile);
1135
1136 reopen_error_exit:
1137         free_dentry_path(page);
1138         free_xid(xid);
1139         return rc;
1140 }
1141
1142 void smb2_deferred_work_close(struct work_struct *work)
1143 {
1144         struct cifsFileInfo *cfile = container_of(work,
1145                         struct cifsFileInfo, deferred.work);
1146
1147         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1148         cifs_del_deferred_close(cfile);
1149         cfile->deferred_close_scheduled = false;
1150         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1151         _cifsFileInfo_put(cfile, true, false);
1152 }
1153
1154 int cifs_close(struct inode *inode, struct file *file)
1155 {
1156         struct cifsFileInfo *cfile;
1157         struct cifsInodeInfo *cinode = CIFS_I(inode);
1158         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1159         struct cifs_deferred_close *dclose;
1160
1161         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1162
1163         if (file->private_data != NULL) {
1164                 cfile = file->private_data;
1165                 file->private_data = NULL;
1166                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1167                 if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1168                     && cinode->lease_granted &&
1169                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1170                     dclose) {
1171                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1172                                 inode_set_mtime_to_ts(inode,
1173                                                       inode_set_ctime_current(inode));
1174                         }
1175                         spin_lock(&cinode->deferred_lock);
1176                         cifs_add_deferred_close(cfile, dclose);
1177                         if (cfile->deferred_close_scheduled &&
1178                             delayed_work_pending(&cfile->deferred)) {
1179                                 /*
1180                                  * If there is no pending work, mod_delayed_work queues new work.
1181                                  * So, Increase the ref count to avoid use-after-free.
1182                                  */
1183                                 if (!mod_delayed_work(deferredclose_wq,
1184                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
1185                                         cifsFileInfo_get(cfile);
1186                         } else {
1187                                 /* Deferred close for files */
1188                                 queue_delayed_work(deferredclose_wq,
1189                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
1190                                 cfile->deferred_close_scheduled = true;
1191                                 spin_unlock(&cinode->deferred_lock);
1192                                 return 0;
1193                         }
1194                         spin_unlock(&cinode->deferred_lock);
1195                         _cifsFileInfo_put(cfile, true, false);
1196                 } else {
1197                         _cifsFileInfo_put(cfile, true, false);
1198                         kfree(dclose);
1199                 }
1200         }
1201
1202         /* return code from the ->release op is always ignored */
1203         return 0;
1204 }
1205
1206 void
1207 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1208 {
1209         struct cifsFileInfo *open_file, *tmp;
1210         struct list_head tmp_list;
1211
1212         if (!tcon->use_persistent || !tcon->need_reopen_files)
1213                 return;
1214
1215         tcon->need_reopen_files = false;
1216
1217         cifs_dbg(FYI, "Reopen persistent handles\n");
1218         INIT_LIST_HEAD(&tmp_list);
1219
1220         /* list all files open on tree connection, reopen resilient handles  */
1221         spin_lock(&tcon->open_file_lock);
1222         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1223                 if (!open_file->invalidHandle)
1224                         continue;
1225                 cifsFileInfo_get(open_file);
1226                 list_add_tail(&open_file->rlist, &tmp_list);
1227         }
1228         spin_unlock(&tcon->open_file_lock);
1229
1230         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1231                 if (cifs_reopen_file(open_file, false /* do not flush */))
1232                         tcon->need_reopen_files = true;
1233                 list_del_init(&open_file->rlist);
1234                 cifsFileInfo_put(open_file);
1235         }
1236 }
1237
1238 int cifs_closedir(struct inode *inode, struct file *file)
1239 {
1240         int rc = 0;
1241         unsigned int xid;
1242         struct cifsFileInfo *cfile = file->private_data;
1243         struct cifs_tcon *tcon;
1244         struct TCP_Server_Info *server;
1245         char *buf;
1246
1247         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1248
1249         if (cfile == NULL)
1250                 return rc;
1251
1252         xid = get_xid();
1253         tcon = tlink_tcon(cfile->tlink);
1254         server = tcon->ses->server;
1255
1256         cifs_dbg(FYI, "Freeing private data in close dir\n");
1257         spin_lock(&cfile->file_info_lock);
1258         if (server->ops->dir_needs_close(cfile)) {
1259                 cfile->invalidHandle = true;
1260                 spin_unlock(&cfile->file_info_lock);
1261                 if (server->ops->close_dir)
1262                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1263                 else
1264                         rc = -ENOSYS;
1265                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1266                 /* not much we can do if it fails anyway, ignore rc */
1267                 rc = 0;
1268         } else
1269                 spin_unlock(&cfile->file_info_lock);
1270
1271         buf = cfile->srch_inf.ntwrk_buf_start;
1272         if (buf) {
1273                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1274                 cfile->srch_inf.ntwrk_buf_start = NULL;
1275                 if (cfile->srch_inf.smallBuf)
1276                         cifs_small_buf_release(buf);
1277                 else
1278                         cifs_buf_release(buf);
1279         }
1280
1281         cifs_put_tlink(cfile->tlink);
1282         kfree(file->private_data);
1283         file->private_data = NULL;
1284         /* BB can we lock the filestruct while this is going on? */
1285         free_xid(xid);
1286         return rc;
1287 }
1288
1289 static struct cifsLockInfo *
1290 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1291 {
1292         struct cifsLockInfo *lock =
1293                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1294         if (!lock)
1295                 return lock;
1296         lock->offset = offset;
1297         lock->length = length;
1298         lock->type = type;
1299         lock->pid = current->tgid;
1300         lock->flags = flags;
1301         INIT_LIST_HEAD(&lock->blist);
1302         init_waitqueue_head(&lock->block_q);
1303         return lock;
1304 }
1305
1306 void
1307 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1308 {
1309         struct cifsLockInfo *li, *tmp;
1310         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1311                 list_del_init(&li->blist);
1312                 wake_up(&li->block_q);
1313         }
1314 }
1315
1316 #define CIFS_LOCK_OP    0
1317 #define CIFS_READ_OP    1
1318 #define CIFS_WRITE_OP   2
1319
1320 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1321 static bool
1322 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1323                             __u64 length, __u8 type, __u16 flags,
1324                             struct cifsFileInfo *cfile,
1325                             struct cifsLockInfo **conf_lock, int rw_check)
1326 {
1327         struct cifsLockInfo *li;
1328         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1329         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1330
1331         list_for_each_entry(li, &fdlocks->locks, llist) {
1332                 if (offset + length <= li->offset ||
1333                     offset >= li->offset + li->length)
1334                         continue;
1335                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1336                     server->ops->compare_fids(cfile, cur_cfile)) {
1337                         /* shared lock prevents write op through the same fid */
1338                         if (!(li->type & server->vals->shared_lock_type) ||
1339                             rw_check != CIFS_WRITE_OP)
1340                                 continue;
1341                 }
1342                 if ((type & server->vals->shared_lock_type) &&
1343                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1344                      current->tgid == li->pid) || type == li->type))
1345                         continue;
1346                 if (rw_check == CIFS_LOCK_OP &&
1347                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1348                     server->ops->compare_fids(cfile, cur_cfile))
1349                         continue;
1350                 if (conf_lock)
1351                         *conf_lock = li;
1352                 return true;
1353         }
1354         return false;
1355 }
1356
1357 bool
1358 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1359                         __u8 type, __u16 flags,
1360                         struct cifsLockInfo **conf_lock, int rw_check)
1361 {
1362         bool rc = false;
1363         struct cifs_fid_locks *cur;
1364         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1365
1366         list_for_each_entry(cur, &cinode->llist, llist) {
1367                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1368                                                  flags, cfile, conf_lock,
1369                                                  rw_check);
1370                 if (rc)
1371                         break;
1372         }
1373
1374         return rc;
1375 }
1376
1377 /*
1378  * Check if there is another lock that prevents us to set the lock (mandatory
1379  * style). If such a lock exists, update the flock structure with its
1380  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1381  * or leave it the same if we can't. Returns 0 if we don't need to request to
1382  * the server or 1 otherwise.
1383  */
1384 static int
1385 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1386                __u8 type, struct file_lock *flock)
1387 {
1388         int rc = 0;
1389         struct cifsLockInfo *conf_lock;
1390         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1391         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1392         bool exist;
1393
1394         down_read(&cinode->lock_sem);
1395
1396         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1397                                         flock->fl_flags, &conf_lock,
1398                                         CIFS_LOCK_OP);
1399         if (exist) {
1400                 flock->fl_start = conf_lock->offset;
1401                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1402                 flock->fl_pid = conf_lock->pid;
1403                 if (conf_lock->type & server->vals->shared_lock_type)
1404                         flock->fl_type = F_RDLCK;
1405                 else
1406                         flock->fl_type = F_WRLCK;
1407         } else if (!cinode->can_cache_brlcks)
1408                 rc = 1;
1409         else
1410                 flock->fl_type = F_UNLCK;
1411
1412         up_read(&cinode->lock_sem);
1413         return rc;
1414 }
1415
1416 static void
1417 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1418 {
1419         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1420         cifs_down_write(&cinode->lock_sem);
1421         list_add_tail(&lock->llist, &cfile->llist->locks);
1422         up_write(&cinode->lock_sem);
1423 }
1424
1425 /*
1426  * Set the byte-range lock (mandatory style). Returns:
1427  * 1) 0, if we set the lock and don't need to request to the server;
1428  * 2) 1, if no locks prevent us but we need to request to the server;
1429  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1430  */
1431 static int
1432 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1433                  bool wait)
1434 {
1435         struct cifsLockInfo *conf_lock;
1436         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1437         bool exist;
1438         int rc = 0;
1439
1440 try_again:
1441         exist = false;
1442         cifs_down_write(&cinode->lock_sem);
1443
1444         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1445                                         lock->type, lock->flags, &conf_lock,
1446                                         CIFS_LOCK_OP);
1447         if (!exist && cinode->can_cache_brlcks) {
1448                 list_add_tail(&lock->llist, &cfile->llist->locks);
1449                 up_write(&cinode->lock_sem);
1450                 return rc;
1451         }
1452
1453         if (!exist)
1454                 rc = 1;
1455         else if (!wait)
1456                 rc = -EACCES;
1457         else {
1458                 list_add_tail(&lock->blist, &conf_lock->blist);
1459                 up_write(&cinode->lock_sem);
1460                 rc = wait_event_interruptible(lock->block_q,
1461                                         (lock->blist.prev == &lock->blist) &&
1462                                         (lock->blist.next == &lock->blist));
1463                 if (!rc)
1464                         goto try_again;
1465                 cifs_down_write(&cinode->lock_sem);
1466                 list_del_init(&lock->blist);
1467         }
1468
1469         up_write(&cinode->lock_sem);
1470         return rc;
1471 }
1472
1473 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1474 /*
1475  * Check if there is another lock that prevents us to set the lock (posix
1476  * style). If such a lock exists, update the flock structure with its
1477  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1478  * or leave it the same if we can't. Returns 0 if we don't need to request to
1479  * the server or 1 otherwise.
1480  */
1481 static int
1482 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1483 {
1484         int rc = 0;
1485         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1486         unsigned char saved_type = flock->fl_type;
1487
1488         if ((flock->fl_flags & FL_POSIX) == 0)
1489                 return 1;
1490
1491         down_read(&cinode->lock_sem);
1492         posix_test_lock(file, flock);
1493
1494         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1495                 flock->fl_type = saved_type;
1496                 rc = 1;
1497         }
1498
1499         up_read(&cinode->lock_sem);
1500         return rc;
1501 }
1502
1503 /*
1504  * Set the byte-range lock (posix style). Returns:
1505  * 1) <0, if the error occurs while setting the lock;
1506  * 2) 0, if we set the lock and don't need to request to the server;
1507  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1508  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1509  */
1510 static int
1511 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1512 {
1513         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1514         int rc = FILE_LOCK_DEFERRED + 1;
1515
1516         if ((flock->fl_flags & FL_POSIX) == 0)
1517                 return rc;
1518
1519         cifs_down_write(&cinode->lock_sem);
1520         if (!cinode->can_cache_brlcks) {
1521                 up_write(&cinode->lock_sem);
1522                 return rc;
1523         }
1524
1525         rc = posix_lock_file(file, flock, NULL);
1526         up_write(&cinode->lock_sem);
1527         return rc;
1528 }
1529
1530 int
1531 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1532 {
1533         unsigned int xid;
1534         int rc = 0, stored_rc;
1535         struct cifsLockInfo *li, *tmp;
1536         struct cifs_tcon *tcon;
1537         unsigned int num, max_num, max_buf;
1538         LOCKING_ANDX_RANGE *buf, *cur;
1539         static const int types[] = {
1540                 LOCKING_ANDX_LARGE_FILES,
1541                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1542         };
1543         int i;
1544
1545         xid = get_xid();
1546         tcon = tlink_tcon(cfile->tlink);
1547
1548         /*
1549          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1550          * and check it before using.
1551          */
1552         max_buf = tcon->ses->server->maxBuf;
1553         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1554                 free_xid(xid);
1555                 return -EINVAL;
1556         }
1557
1558         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1559                      PAGE_SIZE);
1560         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1561                         PAGE_SIZE);
1562         max_num = (max_buf - sizeof(struct smb_hdr)) /
1563                                                 sizeof(LOCKING_ANDX_RANGE);
1564         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1565         if (!buf) {
1566                 free_xid(xid);
1567                 return -ENOMEM;
1568         }
1569
1570         for (i = 0; i < 2; i++) {
1571                 cur = buf;
1572                 num = 0;
1573                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1574                         if (li->type != types[i])
1575                                 continue;
1576                         cur->Pid = cpu_to_le16(li->pid);
1577                         cur->LengthLow = cpu_to_le32((u32)li->length);
1578                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1579                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1580                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1581                         if (++num == max_num) {
1582                                 stored_rc = cifs_lockv(xid, tcon,
1583                                                        cfile->fid.netfid,
1584                                                        (__u8)li->type, 0, num,
1585                                                        buf);
1586                                 if (stored_rc)
1587                                         rc = stored_rc;
1588                                 cur = buf;
1589                                 num = 0;
1590                         } else
1591                                 cur++;
1592                 }
1593
1594                 if (num) {
1595                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1596                                                (__u8)types[i], 0, num, buf);
1597                         if (stored_rc)
1598                                 rc = stored_rc;
1599                 }
1600         }
1601
1602         kfree(buf);
1603         free_xid(xid);
1604         return rc;
1605 }
1606
1607 static __u32
1608 hash_lockowner(fl_owner_t owner)
1609 {
1610         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1611 }
1612 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1613
1614 struct lock_to_push {
1615         struct list_head llist;
1616         __u64 offset;
1617         __u64 length;
1618         __u32 pid;
1619         __u16 netfid;
1620         __u8 type;
1621 };
1622
1623 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1624 static int
1625 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1626 {
1627         struct inode *inode = d_inode(cfile->dentry);
1628         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1629         struct file_lock *flock;
1630         struct file_lock_context *flctx = locks_inode_context(inode);
1631         unsigned int count = 0, i;
1632         int rc = 0, xid, type;
1633         struct list_head locks_to_send, *el;
1634         struct lock_to_push *lck, *tmp;
1635         __u64 length;
1636
1637         xid = get_xid();
1638
1639         if (!flctx)
1640                 goto out;
1641
1642         spin_lock(&flctx->flc_lock);
1643         list_for_each(el, &flctx->flc_posix) {
1644                 count++;
1645         }
1646         spin_unlock(&flctx->flc_lock);
1647
1648         INIT_LIST_HEAD(&locks_to_send);
1649
1650         /*
1651          * Allocating count locks is enough because no FL_POSIX locks can be
1652          * added to the list while we are holding cinode->lock_sem that
1653          * protects locking operations of this inode.
1654          */
1655         for (i = 0; i < count; i++) {
1656                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1657                 if (!lck) {
1658                         rc = -ENOMEM;
1659                         goto err_out;
1660                 }
1661                 list_add_tail(&lck->llist, &locks_to_send);
1662         }
1663
1664         el = locks_to_send.next;
1665         spin_lock(&flctx->flc_lock);
1666         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1667                 if (el == &locks_to_send) {
1668                         /*
1669                          * The list ended. We don't have enough allocated
1670                          * structures - something is really wrong.
1671                          */
1672                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1673                         break;
1674                 }
1675                 length = cifs_flock_len(flock);
1676                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1677                         type = CIFS_RDLCK;
1678                 else
1679                         type = CIFS_WRLCK;
1680                 lck = list_entry(el, struct lock_to_push, llist);
1681                 lck->pid = hash_lockowner(flock->fl_owner);
1682                 lck->netfid = cfile->fid.netfid;
1683                 lck->length = length;
1684                 lck->type = type;
1685                 lck->offset = flock->fl_start;
1686         }
1687         spin_unlock(&flctx->flc_lock);
1688
1689         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1690                 int stored_rc;
1691
1692                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1693                                              lck->offset, lck->length, NULL,
1694                                              lck->type, 0);
1695                 if (stored_rc)
1696                         rc = stored_rc;
1697                 list_del(&lck->llist);
1698                 kfree(lck);
1699         }
1700
1701 out:
1702         free_xid(xid);
1703         return rc;
1704 err_out:
1705         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1706                 list_del(&lck->llist);
1707                 kfree(lck);
1708         }
1709         goto out;
1710 }
1711 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1712
1713 static int
1714 cifs_push_locks(struct cifsFileInfo *cfile)
1715 {
1716         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1717         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1718         int rc = 0;
1719 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1720         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1721 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1722
1723         /* we are going to update can_cache_brlcks here - need a write access */
1724         cifs_down_write(&cinode->lock_sem);
1725         if (!cinode->can_cache_brlcks) {
1726                 up_write(&cinode->lock_sem);
1727                 return rc;
1728         }
1729
1730 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1731         if (cap_unix(tcon->ses) &&
1732             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1733             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1734                 rc = cifs_push_posix_locks(cfile);
1735         else
1736 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1737                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1738
1739         cinode->can_cache_brlcks = false;
1740         up_write(&cinode->lock_sem);
1741         return rc;
1742 }
1743
1744 static void
1745 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1746                 bool *wait_flag, struct TCP_Server_Info *server)
1747 {
1748         if (flock->fl_flags & FL_POSIX)
1749                 cifs_dbg(FYI, "Posix\n");
1750         if (flock->fl_flags & FL_FLOCK)
1751                 cifs_dbg(FYI, "Flock\n");
1752         if (flock->fl_flags & FL_SLEEP) {
1753                 cifs_dbg(FYI, "Blocking lock\n");
1754                 *wait_flag = true;
1755         }
1756         if (flock->fl_flags & FL_ACCESS)
1757                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1758         if (flock->fl_flags & FL_LEASE)
1759                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1760         if (flock->fl_flags &
1761             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1762                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1763                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1764
1765         *type = server->vals->large_lock_type;
1766         if (flock->fl_type == F_WRLCK) {
1767                 cifs_dbg(FYI, "F_WRLCK\n");
1768                 *type |= server->vals->exclusive_lock_type;
1769                 *lock = 1;
1770         } else if (flock->fl_type == F_UNLCK) {
1771                 cifs_dbg(FYI, "F_UNLCK\n");
1772                 *type |= server->vals->unlock_lock_type;
1773                 *unlock = 1;
1774                 /* Check if unlock includes more than one lock range */
1775         } else if (flock->fl_type == F_RDLCK) {
1776                 cifs_dbg(FYI, "F_RDLCK\n");
1777                 *type |= server->vals->shared_lock_type;
1778                 *lock = 1;
1779         } else if (flock->fl_type == F_EXLCK) {
1780                 cifs_dbg(FYI, "F_EXLCK\n");
1781                 *type |= server->vals->exclusive_lock_type;
1782                 *lock = 1;
1783         } else if (flock->fl_type == F_SHLCK) {
1784                 cifs_dbg(FYI, "F_SHLCK\n");
1785                 *type |= server->vals->shared_lock_type;
1786                 *lock = 1;
1787         } else
1788                 cifs_dbg(FYI, "Unknown type of lock\n");
1789 }
1790
1791 static int
1792 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1793            bool wait_flag, bool posix_lck, unsigned int xid)
1794 {
1795         int rc = 0;
1796         __u64 length = cifs_flock_len(flock);
1797         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1798         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1799         struct TCP_Server_Info *server = tcon->ses->server;
1800 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1801         __u16 netfid = cfile->fid.netfid;
1802
1803         if (posix_lck) {
1804                 int posix_lock_type;
1805
1806                 rc = cifs_posix_lock_test(file, flock);
1807                 if (!rc)
1808                         return rc;
1809
1810                 if (type & server->vals->shared_lock_type)
1811                         posix_lock_type = CIFS_RDLCK;
1812                 else
1813                         posix_lock_type = CIFS_WRLCK;
1814                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1815                                       hash_lockowner(flock->fl_owner),
1816                                       flock->fl_start, length, flock,
1817                                       posix_lock_type, wait_flag);
1818                 return rc;
1819         }
1820 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1821
1822         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1823         if (!rc)
1824                 return rc;
1825
1826         /* BB we could chain these into one lock request BB */
1827         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1828                                     1, 0, false);
1829         if (rc == 0) {
1830                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1831                                             type, 0, 1, false);
1832                 flock->fl_type = F_UNLCK;
1833                 if (rc != 0)
1834                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1835                                  rc);
1836                 return 0;
1837         }
1838
1839         if (type & server->vals->shared_lock_type) {
1840                 flock->fl_type = F_WRLCK;
1841                 return 0;
1842         }
1843
1844         type &= ~server->vals->exclusive_lock_type;
1845
1846         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1847                                     type | server->vals->shared_lock_type,
1848                                     1, 0, false);
1849         if (rc == 0) {
1850                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1851                         type | server->vals->shared_lock_type, 0, 1, false);
1852                 flock->fl_type = F_RDLCK;
1853                 if (rc != 0)
1854                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1855                                  rc);
1856         } else
1857                 flock->fl_type = F_WRLCK;
1858
1859         return 0;
1860 }
1861
1862 void
1863 cifs_move_llist(struct list_head *source, struct list_head *dest)
1864 {
1865         struct list_head *li, *tmp;
1866         list_for_each_safe(li, tmp, source)
1867                 list_move(li, dest);
1868 }
1869
1870 void
1871 cifs_free_llist(struct list_head *llist)
1872 {
1873         struct cifsLockInfo *li, *tmp;
1874         list_for_each_entry_safe(li, tmp, llist, llist) {
1875                 cifs_del_lock_waiters(li);
1876                 list_del(&li->llist);
1877                 kfree(li);
1878         }
1879 }
1880
1881 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1882 int
1883 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1884                   unsigned int xid)
1885 {
1886         int rc = 0, stored_rc;
1887         static const int types[] = {
1888                 LOCKING_ANDX_LARGE_FILES,
1889                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1890         };
1891         unsigned int i;
1892         unsigned int max_num, num, max_buf;
1893         LOCKING_ANDX_RANGE *buf, *cur;
1894         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1895         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1896         struct cifsLockInfo *li, *tmp;
1897         __u64 length = cifs_flock_len(flock);
1898         struct list_head tmp_llist;
1899
1900         INIT_LIST_HEAD(&tmp_llist);
1901
1902         /*
1903          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1904          * and check it before using.
1905          */
1906         max_buf = tcon->ses->server->maxBuf;
1907         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1908                 return -EINVAL;
1909
1910         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1911                      PAGE_SIZE);
1912         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1913                         PAGE_SIZE);
1914         max_num = (max_buf - sizeof(struct smb_hdr)) /
1915                                                 sizeof(LOCKING_ANDX_RANGE);
1916         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1917         if (!buf)
1918                 return -ENOMEM;
1919
1920         cifs_down_write(&cinode->lock_sem);
1921         for (i = 0; i < 2; i++) {
1922                 cur = buf;
1923                 num = 0;
1924                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1925                         if (flock->fl_start > li->offset ||
1926                             (flock->fl_start + length) <
1927                             (li->offset + li->length))
1928                                 continue;
1929                         if (current->tgid != li->pid)
1930                                 continue;
1931                         if (types[i] != li->type)
1932                                 continue;
1933                         if (cinode->can_cache_brlcks) {
1934                                 /*
1935                                  * We can cache brlock requests - simply remove
1936                                  * a lock from the file's list.
1937                                  */
1938                                 list_del(&li->llist);
1939                                 cifs_del_lock_waiters(li);
1940                                 kfree(li);
1941                                 continue;
1942                         }
1943                         cur->Pid = cpu_to_le16(li->pid);
1944                         cur->LengthLow = cpu_to_le32((u32)li->length);
1945                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1946                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1947                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1948                         /*
1949                          * We need to save a lock here to let us add it again to
1950                          * the file's list if the unlock range request fails on
1951                          * the server.
1952                          */
1953                         list_move(&li->llist, &tmp_llist);
1954                         if (++num == max_num) {
1955                                 stored_rc = cifs_lockv(xid, tcon,
1956                                                        cfile->fid.netfid,
1957                                                        li->type, num, 0, buf);
1958                                 if (stored_rc) {
1959                                         /*
1960                                          * We failed on the unlock range
1961                                          * request - add all locks from the tmp
1962                                          * list to the head of the file's list.
1963                                          */
1964                                         cifs_move_llist(&tmp_llist,
1965                                                         &cfile->llist->locks);
1966                                         rc = stored_rc;
1967                                 } else
1968                                         /*
1969                                          * The unlock range request succeed -
1970                                          * free the tmp list.
1971                                          */
1972                                         cifs_free_llist(&tmp_llist);
1973                                 cur = buf;
1974                                 num = 0;
1975                         } else
1976                                 cur++;
1977                 }
1978                 if (num) {
1979                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1980                                                types[i], num, 0, buf);
1981                         if (stored_rc) {
1982                                 cifs_move_llist(&tmp_llist,
1983                                                 &cfile->llist->locks);
1984                                 rc = stored_rc;
1985                         } else
1986                                 cifs_free_llist(&tmp_llist);
1987                 }
1988         }
1989
1990         up_write(&cinode->lock_sem);
1991         kfree(buf);
1992         return rc;
1993 }
1994 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1995
1996 static int
1997 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1998            bool wait_flag, bool posix_lck, int lock, int unlock,
1999            unsigned int xid)
2000 {
2001         int rc = 0;
2002         __u64 length = cifs_flock_len(flock);
2003         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2004         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2005         struct TCP_Server_Info *server = tcon->ses->server;
2006         struct inode *inode = d_inode(cfile->dentry);
2007
2008 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
2009         if (posix_lck) {
2010                 int posix_lock_type;
2011
2012                 rc = cifs_posix_lock_set(file, flock);
2013                 if (rc <= FILE_LOCK_DEFERRED)
2014                         return rc;
2015
2016                 if (type & server->vals->shared_lock_type)
2017                         posix_lock_type = CIFS_RDLCK;
2018                 else
2019                         posix_lock_type = CIFS_WRLCK;
2020
2021                 if (unlock == 1)
2022                         posix_lock_type = CIFS_UNLCK;
2023
2024                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
2025                                       hash_lockowner(flock->fl_owner),
2026                                       flock->fl_start, length,
2027                                       NULL, posix_lock_type, wait_flag);
2028                 goto out;
2029         }
2030 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2031         if (lock) {
2032                 struct cifsLockInfo *lock;
2033
2034                 lock = cifs_lock_init(flock->fl_start, length, type,
2035                                       flock->fl_flags);
2036                 if (!lock)
2037                         return -ENOMEM;
2038
2039                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
2040                 if (rc < 0) {
2041                         kfree(lock);
2042                         return rc;
2043                 }
2044                 if (!rc)
2045                         goto out;
2046
2047                 /*
2048                  * Windows 7 server can delay breaking lease from read to None
2049                  * if we set a byte-range lock on a file - break it explicitly
2050                  * before sending the lock to the server to be sure the next
2051                  * read won't conflict with non-overlapted locks due to
2052                  * pagereading.
2053                  */
2054                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
2055                                         CIFS_CACHE_READ(CIFS_I(inode))) {
2056                         cifs_zap_mapping(inode);
2057                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
2058                                  inode);
2059                         CIFS_I(inode)->oplock = 0;
2060                 }
2061
2062                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
2063                                             type, 1, 0, wait_flag);
2064                 if (rc) {
2065                         kfree(lock);
2066                         return rc;
2067                 }
2068
2069                 cifs_lock_add(cfile, lock);
2070         } else if (unlock)
2071                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
2072
2073 out:
2074         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
2075                 /*
2076                  * If this is a request to remove all locks because we
2077                  * are closing the file, it doesn't matter if the
2078                  * unlocking failed as both cifs.ko and the SMB server
2079                  * remove the lock on file close
2080                  */
2081                 if (rc) {
2082                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2083                         if (!(flock->fl_flags & FL_CLOSE))
2084                                 return rc;
2085                 }
2086                 rc = locks_lock_file_wait(file, flock);
2087         }
2088         return rc;
2089 }
2090
2091 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2092 {
2093         int rc, xid;
2094         int lock = 0, unlock = 0;
2095         bool wait_flag = false;
2096         bool posix_lck = false;
2097         struct cifs_sb_info *cifs_sb;
2098         struct cifs_tcon *tcon;
2099         struct cifsFileInfo *cfile;
2100         __u32 type;
2101
2102         xid = get_xid();
2103
2104         if (!(fl->fl_flags & FL_FLOCK)) {
2105                 rc = -ENOLCK;
2106                 free_xid(xid);
2107                 return rc;
2108         }
2109
2110         cfile = (struct cifsFileInfo *)file->private_data;
2111         tcon = tlink_tcon(cfile->tlink);
2112
2113         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2114                         tcon->ses->server);
2115         cifs_sb = CIFS_FILE_SB(file);
2116
2117         if (cap_unix(tcon->ses) &&
2118             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2119             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2120                 posix_lck = true;
2121
2122         if (!lock && !unlock) {
2123                 /*
2124                  * if no lock or unlock then nothing to do since we do not
2125                  * know what it is
2126                  */
2127                 rc = -EOPNOTSUPP;
2128                 free_xid(xid);
2129                 return rc;
2130         }
2131
2132         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2133                         xid);
2134         free_xid(xid);
2135         return rc;
2136
2137
2138 }
2139
2140 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2141 {
2142         int rc, xid;
2143         int lock = 0, unlock = 0;
2144         bool wait_flag = false;
2145         bool posix_lck = false;
2146         struct cifs_sb_info *cifs_sb;
2147         struct cifs_tcon *tcon;
2148         struct cifsFileInfo *cfile;
2149         __u32 type;
2150
2151         rc = -EACCES;
2152         xid = get_xid();
2153
2154         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2155                  flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2156                  (long long)flock->fl_end);
2157
2158         cfile = (struct cifsFileInfo *)file->private_data;
2159         tcon = tlink_tcon(cfile->tlink);
2160
2161         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2162                         tcon->ses->server);
2163         cifs_sb = CIFS_FILE_SB(file);
2164         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2165
2166         if (cap_unix(tcon->ses) &&
2167             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2168             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2169                 posix_lck = true;
2170         /*
2171          * BB add code here to normalize offset and length to account for
2172          * negative length which we can not accept over the wire.
2173          */
2174         if (IS_GETLK(cmd)) {
2175                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2176                 free_xid(xid);
2177                 return rc;
2178         }
2179
2180         if (!lock && !unlock) {
2181                 /*
2182                  * if no lock or unlock then nothing to do since we do not
2183                  * know what it is
2184                  */
2185                 free_xid(xid);
2186                 return -EOPNOTSUPP;
2187         }
2188
2189         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2190                         xid);
2191         free_xid(xid);
2192         return rc;
2193 }
2194
2195 /*
2196  * update the file size (if needed) after a write. Should be called with
2197  * the inode->i_lock held
2198  */
2199 void
2200 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2201                       unsigned int bytes_written)
2202 {
2203         loff_t end_of_write = offset + bytes_written;
2204
2205         if (end_of_write > cifsi->netfs.remote_i_size)
2206                 netfs_resize_file(&cifsi->netfs, end_of_write, true);
2207 }
2208
2209 static ssize_t
2210 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2211            size_t write_size, loff_t *offset)
2212 {
2213         int rc = 0;
2214         unsigned int bytes_written = 0;
2215         unsigned int total_written;
2216         struct cifs_tcon *tcon;
2217         struct TCP_Server_Info *server;
2218         unsigned int xid;
2219         struct dentry *dentry = open_file->dentry;
2220         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2221         struct cifs_io_parms io_parms = {0};
2222
2223         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2224                  write_size, *offset, dentry);
2225
2226         tcon = tlink_tcon(open_file->tlink);
2227         server = tcon->ses->server;
2228
2229         if (!server->ops->sync_write)
2230                 return -ENOSYS;
2231
2232         xid = get_xid();
2233
2234         for (total_written = 0; write_size > total_written;
2235              total_written += bytes_written) {
2236                 rc = -EAGAIN;
2237                 while (rc == -EAGAIN) {
2238                         struct kvec iov[2];
2239                         unsigned int len;
2240
2241                         if (open_file->invalidHandle) {
2242                                 /* we could deadlock if we called
2243                                    filemap_fdatawait from here so tell
2244                                    reopen_file not to flush data to
2245                                    server now */
2246                                 rc = cifs_reopen_file(open_file, false);
2247                                 if (rc != 0)
2248                                         break;
2249                         }
2250
2251                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2252                                   (unsigned int)write_size - total_written);
2253                         /* iov[0] is reserved for smb header */
2254                         iov[1].iov_base = (char *)write_data + total_written;
2255                         iov[1].iov_len = len;
2256                         io_parms.pid = pid;
2257                         io_parms.tcon = tcon;
2258                         io_parms.offset = *offset;
2259                         io_parms.length = len;
2260                         rc = server->ops->sync_write(xid, &open_file->fid,
2261                                         &io_parms, &bytes_written, iov, 1);
2262                 }
2263                 if (rc || (bytes_written == 0)) {
2264                         if (total_written)
2265                                 break;
2266                         else {
2267                                 free_xid(xid);
2268                                 return rc;
2269                         }
2270                 } else {
2271                         spin_lock(&d_inode(dentry)->i_lock);
2272                         cifs_update_eof(cifsi, *offset, bytes_written);
2273                         spin_unlock(&d_inode(dentry)->i_lock);
2274                         *offset += bytes_written;
2275                 }
2276         }
2277
2278         cifs_stats_bytes_written(tcon, total_written);
2279
2280         if (total_written > 0) {
2281                 spin_lock(&d_inode(dentry)->i_lock);
2282                 if (*offset > d_inode(dentry)->i_size) {
2283                         i_size_write(d_inode(dentry), *offset);
2284                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2285                 }
2286                 spin_unlock(&d_inode(dentry)->i_lock);
2287         }
2288         mark_inode_dirty_sync(d_inode(dentry));
2289         free_xid(xid);
2290         return total_written;
2291 }
2292
2293 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2294                                         bool fsuid_only)
2295 {
2296         struct cifsFileInfo *open_file = NULL;
2297         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2298
2299         /* only filter by fsuid on multiuser mounts */
2300         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2301                 fsuid_only = false;
2302
2303         spin_lock(&cifs_inode->open_file_lock);
2304         /* we could simply get the first_list_entry since write-only entries
2305            are always at the end of the list but since the first entry might
2306            have a close pending, we go through the whole list */
2307         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2308                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2309                         continue;
2310                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2311                         if ((!open_file->invalidHandle)) {
2312                                 /* found a good file */
2313                                 /* lock it so it will not be closed on us */
2314                                 cifsFileInfo_get(open_file);
2315                                 spin_unlock(&cifs_inode->open_file_lock);
2316                                 return open_file;
2317                         } /* else might as well continue, and look for
2318                              another, or simply have the caller reopen it
2319                              again rather than trying to fix this handle */
2320                 } else /* write only file */
2321                         break; /* write only files are last so must be done */
2322         }
2323         spin_unlock(&cifs_inode->open_file_lock);
2324         return NULL;
2325 }
2326
2327 /* Return -EBADF if no handle is found and general rc otherwise */
2328 int
2329 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2330                        struct cifsFileInfo **ret_file)
2331 {
2332         struct cifsFileInfo *open_file, *inv_file = NULL;
2333         struct cifs_sb_info *cifs_sb;
2334         bool any_available = false;
2335         int rc = -EBADF;
2336         unsigned int refind = 0;
2337         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2338         bool with_delete = flags & FIND_WR_WITH_DELETE;
2339         *ret_file = NULL;
2340
2341         /*
2342          * Having a null inode here (because mapping->host was set to zero by
2343          * the VFS or MM) should not happen but we had reports of on oops (due
2344          * to it being zero) during stress testcases so we need to check for it
2345          */
2346
2347         if (cifs_inode == NULL) {
2348                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2349                 dump_stack();
2350                 return rc;
2351         }
2352
2353         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2354
2355         /* only filter by fsuid on multiuser mounts */
2356         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2357                 fsuid_only = false;
2358
2359         spin_lock(&cifs_inode->open_file_lock);
2360 refind_writable:
2361         if (refind > MAX_REOPEN_ATT) {
2362                 spin_unlock(&cifs_inode->open_file_lock);
2363                 return rc;
2364         }
2365         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2366                 if (!any_available && open_file->pid != current->tgid)
2367                         continue;
2368                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2369                         continue;
2370                 if (with_delete && !(open_file->fid.access & DELETE))
2371                         continue;
2372                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2373                         if (!open_file->invalidHandle) {
2374                                 /* found a good writable file */
2375                                 cifsFileInfo_get(open_file);
2376                                 spin_unlock(&cifs_inode->open_file_lock);
2377                                 *ret_file = open_file;
2378                                 return 0;
2379                         } else {
2380                                 if (!inv_file)
2381                                         inv_file = open_file;
2382                         }
2383                 }
2384         }
2385         /* couldn't find useable FH with same pid, try any available */
2386         if (!any_available) {
2387                 any_available = true;
2388                 goto refind_writable;
2389         }
2390
2391         if (inv_file) {
2392                 any_available = false;
2393                 cifsFileInfo_get(inv_file);
2394         }
2395
2396         spin_unlock(&cifs_inode->open_file_lock);
2397
2398         if (inv_file) {
2399                 rc = cifs_reopen_file(inv_file, false);
2400                 if (!rc) {
2401                         *ret_file = inv_file;
2402                         return 0;
2403                 }
2404
2405                 spin_lock(&cifs_inode->open_file_lock);
2406                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2407                 spin_unlock(&cifs_inode->open_file_lock);
2408                 cifsFileInfo_put(inv_file);
2409                 ++refind;
2410                 inv_file = NULL;
2411                 spin_lock(&cifs_inode->open_file_lock);
2412                 goto refind_writable;
2413         }
2414
2415         return rc;
2416 }
2417
2418 struct cifsFileInfo *
2419 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2420 {
2421         struct cifsFileInfo *cfile;
2422         int rc;
2423
2424         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2425         if (rc)
2426                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2427
2428         return cfile;
2429 }
2430
2431 int
2432 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2433                        int flags,
2434                        struct cifsFileInfo **ret_file)
2435 {
2436         struct cifsFileInfo *cfile;
2437         void *page = alloc_dentry_path();
2438
2439         *ret_file = NULL;
2440
2441         spin_lock(&tcon->open_file_lock);
2442         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2443                 struct cifsInodeInfo *cinode;
2444                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2445                 if (IS_ERR(full_path)) {
2446                         spin_unlock(&tcon->open_file_lock);
2447                         free_dentry_path(page);
2448                         return PTR_ERR(full_path);
2449                 }
2450                 if (strcmp(full_path, name))
2451                         continue;
2452
2453                 cinode = CIFS_I(d_inode(cfile->dentry));
2454                 spin_unlock(&tcon->open_file_lock);
2455                 free_dentry_path(page);
2456                 return cifs_get_writable_file(cinode, flags, ret_file);
2457         }
2458
2459         spin_unlock(&tcon->open_file_lock);
2460         free_dentry_path(page);
2461         return -ENOENT;
2462 }
2463
2464 int
2465 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2466                        struct cifsFileInfo **ret_file)
2467 {
2468         struct cifsFileInfo *cfile;
2469         void *page = alloc_dentry_path();
2470
2471         *ret_file = NULL;
2472
2473         spin_lock(&tcon->open_file_lock);
2474         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2475                 struct cifsInodeInfo *cinode;
2476                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2477                 if (IS_ERR(full_path)) {
2478                         spin_unlock(&tcon->open_file_lock);
2479                         free_dentry_path(page);
2480                         return PTR_ERR(full_path);
2481                 }
2482                 if (strcmp(full_path, name))
2483                         continue;
2484
2485                 cinode = CIFS_I(d_inode(cfile->dentry));
2486                 spin_unlock(&tcon->open_file_lock);
2487                 free_dentry_path(page);
2488                 *ret_file = find_readable_file(cinode, 0);
2489                 return *ret_file ? 0 : -ENOENT;
2490         }
2491
2492         spin_unlock(&tcon->open_file_lock);
2493         free_dentry_path(page);
2494         return -ENOENT;
2495 }
2496
2497 void
2498 cifs_writedata_release(struct kref *refcount)
2499 {
2500         struct cifs_writedata *wdata = container_of(refcount,
2501                                         struct cifs_writedata, refcount);
2502 #ifdef CONFIG_CIFS_SMB_DIRECT
2503         if (wdata->mr) {
2504                 smbd_deregister_mr(wdata->mr);
2505                 wdata->mr = NULL;
2506         }
2507 #endif
2508
2509         if (wdata->cfile)
2510                 cifsFileInfo_put(wdata->cfile);
2511
2512         kfree(wdata);
2513 }
2514
2515 /*
2516  * Write failed with a retryable error. Resend the write request. It's also
2517  * possible that the page was redirtied so re-clean the page.
2518  */
2519 static void
2520 cifs_writev_requeue(struct cifs_writedata *wdata)
2521 {
2522         int rc = 0;
2523         struct inode *inode = d_inode(wdata->cfile->dentry);
2524         struct TCP_Server_Info *server;
2525         unsigned int rest_len = wdata->bytes;
2526         loff_t fpos = wdata->offset;
2527
2528         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2529         do {
2530                 struct cifs_writedata *wdata2;
2531                 unsigned int wsize, cur_len;
2532
2533                 wsize = server->ops->wp_retry_size(inode);
2534                 if (wsize < rest_len) {
2535                         if (wsize < PAGE_SIZE) {
2536                                 rc = -EOPNOTSUPP;
2537                                 break;
2538                         }
2539                         cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2540                 } else {
2541                         cur_len = rest_len;
2542                 }
2543
2544                 wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2545                 if (!wdata2) {
2546                         rc = -ENOMEM;
2547                         break;
2548                 }
2549
2550                 wdata2->sync_mode = wdata->sync_mode;
2551                 wdata2->offset  = fpos;
2552                 wdata2->bytes   = cur_len;
2553                 wdata2->iter    = wdata->iter;
2554
2555                 iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2556                 iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2557
2558                 if (iov_iter_is_xarray(&wdata2->iter))
2559                         /* Check for pages having been redirtied and clean
2560                          * them.  We can do this by walking the xarray.  If
2561                          * it's not an xarray, then it's a DIO and we shouldn't
2562                          * be mucking around with the page bits.
2563                          */
2564                         cifs_undirty_folios(inode, fpos, cur_len);
2565
2566                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2567                                             &wdata2->cfile);
2568                 if (!wdata2->cfile) {
2569                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2570                                  rc);
2571                         if (!is_retryable_error(rc))
2572                                 rc = -EBADF;
2573                 } else {
2574                         wdata2->pid = wdata2->cfile->pid;
2575                         rc = server->ops->async_writev(wdata2,
2576                                                        cifs_writedata_release);
2577                 }
2578
2579                 kref_put(&wdata2->refcount, cifs_writedata_release);
2580                 if (rc) {
2581                         if (is_retryable_error(rc))
2582                                 continue;
2583                         fpos += cur_len;
2584                         rest_len -= cur_len;
2585                         break;
2586                 }
2587
2588                 fpos += cur_len;
2589                 rest_len -= cur_len;
2590         } while (rest_len > 0);
2591
2592         /* Clean up remaining pages from the original wdata */
2593         if (iov_iter_is_xarray(&wdata->iter))
2594                 cifs_pages_write_failed(inode, fpos, rest_len);
2595
2596         if (rc != 0 && !is_retryable_error(rc))
2597                 mapping_set_error(inode->i_mapping, rc);
2598         kref_put(&wdata->refcount, cifs_writedata_release);
2599 }
2600
2601 void
2602 cifs_writev_complete(struct work_struct *work)
2603 {
2604         struct cifs_writedata *wdata = container_of(work,
2605                                                 struct cifs_writedata, work);
2606         struct inode *inode = d_inode(wdata->cfile->dentry);
2607
2608         if (wdata->result == 0) {
2609                 spin_lock(&inode->i_lock);
2610                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2611                 spin_unlock(&inode->i_lock);
2612                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2613                                          wdata->bytes);
2614         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2615                 return cifs_writev_requeue(wdata);
2616
2617         if (wdata->result == -EAGAIN)
2618                 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2619         else if (wdata->result < 0)
2620                 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2621         else
2622                 cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2623
2624         if (wdata->result != -EAGAIN)
2625                 mapping_set_error(inode->i_mapping, wdata->result);
2626         kref_put(&wdata->refcount, cifs_writedata_release);
2627 }
2628
2629 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2630 {
2631         struct cifs_writedata *wdata;
2632
2633         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2634         if (wdata != NULL) {
2635                 kref_init(&wdata->refcount);
2636                 INIT_LIST_HEAD(&wdata->list);
2637                 init_completion(&wdata->done);
2638                 INIT_WORK(&wdata->work, complete);
2639         }
2640         return wdata;
2641 }
2642
2643 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2644 {
2645         struct address_space *mapping = page->mapping;
2646         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2647         char *write_data;
2648         int rc = -EFAULT;
2649         int bytes_written = 0;
2650         struct inode *inode;
2651         struct cifsFileInfo *open_file;
2652
2653         if (!mapping || !mapping->host)
2654                 return -EFAULT;
2655
2656         inode = page->mapping->host;
2657
2658         offset += (loff_t)from;
2659         write_data = kmap(page);
2660         write_data += from;
2661
2662         if ((to > PAGE_SIZE) || (from > to)) {
2663                 kunmap(page);
2664                 return -EIO;
2665         }
2666
2667         /* racing with truncate? */
2668         if (offset > mapping->host->i_size) {
2669                 kunmap(page);
2670                 return 0; /* don't care */
2671         }
2672
2673         /* check to make sure that we are not extending the file */
2674         if (mapping->host->i_size - offset < (loff_t)to)
2675                 to = (unsigned)(mapping->host->i_size - offset);
2676
2677         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2678                                     &open_file);
2679         if (!rc) {
2680                 bytes_written = cifs_write(open_file, open_file->pid,
2681                                            write_data, to - from, &offset);
2682                 cifsFileInfo_put(open_file);
2683                 /* Does mm or vfs already set times? */
2684                 simple_inode_init_ts(inode);
2685                 if ((bytes_written > 0) && (offset))
2686                         rc = 0;
2687                 else if (bytes_written < 0)
2688                         rc = bytes_written;
2689                 else
2690                         rc = -EFAULT;
2691         } else {
2692                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2693                 if (!is_retryable_error(rc))
2694                         rc = -EIO;
2695         }
2696
2697         kunmap(page);
2698         return rc;
2699 }
2700
2701 /*
2702  * Extend the region to be written back to include subsequent contiguously
2703  * dirty pages if possible, but don't sleep while doing so.
2704  */
2705 static void cifs_extend_writeback(struct address_space *mapping,
2706                                   struct xa_state *xas,
2707                                   long *_count,
2708                                   loff_t start,
2709                                   int max_pages,
2710                                   loff_t max_len,
2711                                   size_t *_len)
2712 {
2713         struct folio_batch batch;
2714         struct folio *folio;
2715         unsigned int nr_pages;
2716         pgoff_t index = (start + *_len) / PAGE_SIZE;
2717         size_t len;
2718         bool stop = true;
2719         unsigned int i;
2720
2721         folio_batch_init(&batch);
2722
2723         do {
2724                 /* Firstly, we gather up a batch of contiguous dirty pages
2725                  * under the RCU read lock - but we can't clear the dirty flags
2726                  * there if any of those pages are mapped.
2727                  */
2728                 rcu_read_lock();
2729
2730                 xas_for_each(xas, folio, ULONG_MAX) {
2731                         stop = true;
2732                         if (xas_retry(xas, folio))
2733                                 continue;
2734                         if (xa_is_value(folio))
2735                                 break;
2736                         if (folio->index != index) {
2737                                 xas_reset(xas);
2738                                 break;
2739                         }
2740
2741                         if (!folio_try_get_rcu(folio)) {
2742                                 xas_reset(xas);
2743                                 continue;
2744                         }
2745                         nr_pages = folio_nr_pages(folio);
2746                         if (nr_pages > max_pages) {
2747                                 xas_reset(xas);
2748                                 break;
2749                         }
2750
2751                         /* Has the page moved or been split? */
2752                         if (unlikely(folio != xas_reload(xas))) {
2753                                 folio_put(folio);
2754                                 xas_reset(xas);
2755                                 break;
2756                         }
2757
2758                         if (!folio_trylock(folio)) {
2759                                 folio_put(folio);
2760                                 xas_reset(xas);
2761                                 break;
2762                         }
2763                         if (!folio_test_dirty(folio) ||
2764                             folio_test_writeback(folio)) {
2765                                 folio_unlock(folio);
2766                                 folio_put(folio);
2767                                 xas_reset(xas);
2768                                 break;
2769                         }
2770
2771                         max_pages -= nr_pages;
2772                         len = folio_size(folio);
2773                         stop = false;
2774
2775                         index += nr_pages;
2776                         *_count -= nr_pages;
2777                         *_len += len;
2778                         if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2779                                 stop = true;
2780
2781                         if (!folio_batch_add(&batch, folio))
2782                                 break;
2783                         if (stop)
2784                                 break;
2785                 }
2786
2787                 xas_pause(xas);
2788                 rcu_read_unlock();
2789
2790                 /* Now, if we obtained any pages, we can shift them to being
2791                  * writable and mark them for caching.
2792                  */
2793                 if (!folio_batch_count(&batch))
2794                         break;
2795
2796                 for (i = 0; i < folio_batch_count(&batch); i++) {
2797                         folio = batch.folios[i];
2798                         /* The folio should be locked, dirty and not undergoing
2799                          * writeback from the loop above.
2800                          */
2801                         if (!folio_clear_dirty_for_io(folio))
2802                                 WARN_ON(1);
2803                         folio_start_writeback(folio);
2804                         folio_unlock(folio);
2805                 }
2806
2807                 folio_batch_release(&batch);
2808                 cond_resched();
2809         } while (!stop);
2810 }
2811
2812 /*
2813  * Write back the locked page and any subsequent non-locked dirty pages.
2814  */
2815 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2816                                                  struct writeback_control *wbc,
2817                                                  struct xa_state *xas,
2818                                                  struct folio *folio,
2819                                                  unsigned long long start,
2820                                                  unsigned long long end)
2821 {
2822         struct inode *inode = mapping->host;
2823         struct TCP_Server_Info *server;
2824         struct cifs_writedata *wdata;
2825         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2826         struct cifs_credits credits_on_stack;
2827         struct cifs_credits *credits = &credits_on_stack;
2828         struct cifsFileInfo *cfile = NULL;
2829         unsigned long long i_size = i_size_read(inode), max_len;
2830         unsigned int xid, wsize;
2831         size_t len = folio_size(folio);
2832         long count = wbc->nr_to_write;
2833         int rc;
2834
2835         /* The folio should be locked, dirty and not undergoing writeback. */
2836         if (!folio_clear_dirty_for_io(folio))
2837                 WARN_ON_ONCE(1);
2838         folio_start_writeback(folio);
2839
2840         count -= folio_nr_pages(folio);
2841
2842         xid = get_xid();
2843         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2844
2845         rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2846         if (rc) {
2847                 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2848                 goto err_xid;
2849         }
2850
2851         rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2852                                            &wsize, credits);
2853         if (rc != 0)
2854                 goto err_close;
2855
2856         wdata = cifs_writedata_alloc(cifs_writev_complete);
2857         if (!wdata) {
2858                 rc = -ENOMEM;
2859                 goto err_uncredit;
2860         }
2861
2862         wdata->sync_mode = wbc->sync_mode;
2863         wdata->offset = folio_pos(folio);
2864         wdata->pid = cfile->pid;
2865         wdata->credits = credits_on_stack;
2866         wdata->cfile = cfile;
2867         wdata->server = server;
2868         cfile = NULL;
2869
2870         /* Find all consecutive lockable dirty pages that have contiguous
2871          * written regions, stopping when we find a page that is not
2872          * immediately lockable, is not dirty or is missing, or we reach the
2873          * end of the range.
2874          */
2875         if (start < i_size) {
2876                 /* Trim the write to the EOF; the extra data is ignored.  Also
2877                  * put an upper limit on the size of a single storedata op.
2878                  */
2879                 max_len = wsize;
2880                 max_len = min_t(unsigned long long, max_len, end - start + 1);
2881                 max_len = min_t(unsigned long long, max_len, i_size - start);
2882
2883                 if (len < max_len) {
2884                         int max_pages = INT_MAX;
2885
2886 #ifdef CONFIG_CIFS_SMB_DIRECT
2887                         if (server->smbd_conn)
2888                                 max_pages = server->smbd_conn->max_frmr_depth;
2889 #endif
2890                         max_pages -= folio_nr_pages(folio);
2891
2892                         if (max_pages > 0)
2893                                 cifs_extend_writeback(mapping, xas, &count, start,
2894                                                       max_pages, max_len, &len);
2895                 }
2896         }
2897         len = min_t(unsigned long long, len, i_size - start);
2898
2899         /* We now have a contiguous set of dirty pages, each with writeback
2900          * set; the first page is still locked at this point, but all the rest
2901          * have been unlocked.
2902          */
2903         folio_unlock(folio);
2904         wdata->bytes = len;
2905
2906         if (start < i_size) {
2907                 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2908                                 start, len);
2909
2910                 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2911                 if (rc)
2912                         goto err_wdata;
2913
2914                 if (wdata->cfile->invalidHandle)
2915                         rc = -EAGAIN;
2916                 else
2917                         rc = wdata->server->ops->async_writev(wdata,
2918                                                               cifs_writedata_release);
2919                 if (rc >= 0) {
2920                         kref_put(&wdata->refcount, cifs_writedata_release);
2921                         goto err_close;
2922                 }
2923         } else {
2924                 /* The dirty region was entirely beyond the EOF. */
2925                 cifs_pages_written_back(inode, start, len);
2926                 rc = 0;
2927         }
2928
2929 err_wdata:
2930         kref_put(&wdata->refcount, cifs_writedata_release);
2931 err_uncredit:
2932         add_credits_and_wake_if(server, credits, 0);
2933 err_close:
2934         if (cfile)
2935                 cifsFileInfo_put(cfile);
2936 err_xid:
2937         free_xid(xid);
2938         if (rc == 0) {
2939                 wbc->nr_to_write = count;
2940                 rc = len;
2941         } else if (is_retryable_error(rc)) {
2942                 cifs_pages_write_redirty(inode, start, len);
2943         } else {
2944                 cifs_pages_write_failed(inode, start, len);
2945                 mapping_set_error(mapping, rc);
2946         }
2947         /* Indication to update ctime and mtime as close is deferred */
2948         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2949         return rc;
2950 }
2951
2952 /*
2953  * write a region of pages back to the server
2954  */
2955 static ssize_t cifs_writepages_begin(struct address_space *mapping,
2956                                      struct writeback_control *wbc,
2957                                      struct xa_state *xas,
2958                                      unsigned long long *_start,
2959                                      unsigned long long end)
2960 {
2961         struct folio *folio;
2962         unsigned long long start = *_start;
2963         ssize_t ret;
2964         int skips = 0;
2965
2966 search_again:
2967         /* Find the first dirty page. */
2968         rcu_read_lock();
2969
2970         for (;;) {
2971                 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2972                 if (xas_retry(xas, folio) || xa_is_value(folio))
2973                         continue;
2974                 if (!folio)
2975                         break;
2976
2977                 if (!folio_try_get_rcu(folio)) {
2978                         xas_reset(xas);
2979                         continue;
2980                 }
2981
2982                 if (unlikely(folio != xas_reload(xas))) {
2983                         folio_put(folio);
2984                         xas_reset(xas);
2985                         continue;
2986                 }
2987
2988                 xas_pause(xas);
2989                 break;
2990         }
2991         rcu_read_unlock();
2992         if (!folio)
2993                 return 0;
2994
2995         start = folio_pos(folio); /* May regress with THPs */
2996
2997         /* At this point we hold neither the i_pages lock nor the page lock:
2998          * the page may be truncated or invalidated (changing page->mapping to
2999          * NULL), or even swizzled back from swapper_space to tmpfs file
3000          * mapping
3001          */
3002 lock_again:
3003         if (wbc->sync_mode != WB_SYNC_NONE) {
3004                 ret = folio_lock_killable(folio);
3005                 if (ret < 0)
3006                         return ret;
3007         } else {
3008                 if (!folio_trylock(folio))
3009                         goto search_again;
3010         }
3011
3012         if (folio->mapping != mapping ||
3013             !folio_test_dirty(folio)) {
3014                 start += folio_size(folio);
3015                 folio_unlock(folio);
3016                 goto search_again;
3017         }
3018
3019         if (folio_test_writeback(folio) ||
3020             folio_test_fscache(folio)) {
3021                 folio_unlock(folio);
3022                 if (wbc->sync_mode != WB_SYNC_NONE) {
3023                         folio_wait_writeback(folio);
3024 #ifdef CONFIG_CIFS_FSCACHE
3025                         folio_wait_fscache(folio);
3026 #endif
3027                         goto lock_again;
3028                 }
3029
3030                 start += folio_size(folio);
3031                 if (wbc->sync_mode == WB_SYNC_NONE) {
3032                         if (skips >= 5 || need_resched()) {
3033                                 ret = 0;
3034                                 goto out;
3035                         }
3036                         skips++;
3037                 }
3038                 goto search_again;
3039         }
3040
3041         ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
3042 out:
3043         if (ret > 0)
3044                 *_start = start + ret;
3045         return ret;
3046 }
3047
3048 /*
3049  * Write a region of pages back to the server
3050  */
3051 static int cifs_writepages_region(struct address_space *mapping,
3052                                   struct writeback_control *wbc,
3053                                   unsigned long long *_start,
3054                                   unsigned long long end)
3055 {
3056         ssize_t ret;
3057
3058         XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
3059
3060         do {
3061                 ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
3062                 if (ret > 0 && wbc->nr_to_write > 0)
3063                         cond_resched();
3064         } while (ret > 0 && wbc->nr_to_write > 0);
3065
3066         return ret > 0 ? 0 : ret;
3067 }
3068
3069 /*
3070  * Write some of the pending data back to the server
3071  */
3072 static int cifs_writepages(struct address_space *mapping,
3073                            struct writeback_control *wbc)
3074 {
3075         loff_t start, end;
3076         int ret;
3077
3078         /* We have to be careful as we can end up racing with setattr()
3079          * truncating the pagecache since the caller doesn't take a lock here
3080          * to prevent it.
3081          */
3082
3083         if (wbc->range_cyclic && mapping->writeback_index) {
3084                 start = mapping->writeback_index * PAGE_SIZE;
3085                 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3086                 if (ret < 0)
3087                         goto out;
3088
3089                 if (wbc->nr_to_write <= 0) {
3090                         mapping->writeback_index = start / PAGE_SIZE;
3091                         goto out;
3092                 }
3093
3094                 start = 0;
3095                 end = mapping->writeback_index * PAGE_SIZE;
3096                 mapping->writeback_index = 0;
3097                 ret = cifs_writepages_region(mapping, wbc, &start, end);
3098                 if (ret == 0)
3099                         mapping->writeback_index = start / PAGE_SIZE;
3100         } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3101                 start = 0;
3102                 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3103                 if (wbc->nr_to_write > 0 && ret == 0)
3104                         mapping->writeback_index = start / PAGE_SIZE;
3105         } else {
3106                 start = wbc->range_start;
3107                 ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3108         }
3109
3110 out:
3111         return ret;
3112 }
3113
3114 static int
3115 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3116 {
3117         int rc;
3118         unsigned int xid;
3119
3120         xid = get_xid();
3121 /* BB add check for wbc flags */
3122         get_page(page);
3123         if (!PageUptodate(page))
3124                 cifs_dbg(FYI, "ppw - page not up to date\n");
3125
3126         /*
3127          * Set the "writeback" flag, and clear "dirty" in the radix tree.
3128          *
3129          * A writepage() implementation always needs to do either this,
3130          * or re-dirty the page with "redirty_page_for_writepage()" in
3131          * the case of a failure.
3132          *
3133          * Just unlocking the page will cause the radix tree tag-bits
3134          * to fail to update with the state of the page correctly.
3135          */
3136         set_page_writeback(page);
3137 retry_write:
3138         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3139         if (is_retryable_error(rc)) {
3140                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3141                         goto retry_write;
3142                 redirty_page_for_writepage(wbc, page);
3143         } else if (rc != 0) {
3144                 SetPageError(page);
3145                 mapping_set_error(page->mapping, rc);
3146         } else {
3147                 SetPageUptodate(page);
3148         }
3149         end_page_writeback(page);
3150         put_page(page);
3151         free_xid(xid);
3152         return rc;
3153 }
3154
3155 static int cifs_write_end(struct file *file, struct address_space *mapping,
3156                         loff_t pos, unsigned len, unsigned copied,
3157                         struct page *page, void *fsdata)
3158 {
3159         int rc;
3160         struct inode *inode = mapping->host;
3161         struct cifsFileInfo *cfile = file->private_data;
3162         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3163         struct folio *folio = page_folio(page);
3164         __u32 pid;
3165
3166         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3167                 pid = cfile->pid;
3168         else
3169                 pid = current->tgid;
3170
3171         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3172                  page, pos, copied);
3173
3174         if (folio_test_checked(folio)) {
3175                 if (copied == len)
3176                         folio_mark_uptodate(folio);
3177                 folio_clear_checked(folio);
3178         } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3179                 folio_mark_uptodate(folio);
3180
3181         if (!folio_test_uptodate(folio)) {
3182                 char *page_data;
3183                 unsigned offset = pos & (PAGE_SIZE - 1);
3184                 unsigned int xid;
3185
3186                 xid = get_xid();
3187                 /* this is probably better than directly calling
3188                    partialpage_write since in this function the file handle is
3189                    known which we might as well leverage */
3190                 /* BB check if anything else missing out of ppw
3191                    such as updating last write time */
3192                 page_data = kmap(page);
3193                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3194                 /* if (rc < 0) should we set writebehind rc? */
3195                 kunmap(page);
3196
3197                 free_xid(xid);
3198         } else {
3199                 rc = copied;
3200                 pos += copied;
3201                 set_page_dirty(page);
3202         }
3203
3204         if (rc > 0) {
3205                 spin_lock(&inode->i_lock);
3206                 if (pos > inode->i_size) {
3207                         i_size_write(inode, pos);
3208                         inode->i_blocks = (512 - 1 + pos) >> 9;
3209                 }
3210                 spin_unlock(&inode->i_lock);
3211         }
3212
3213         unlock_page(page);
3214         put_page(page);
3215         /* Indication to update ctime and mtime as close is deferred */
3216         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3217
3218         return rc;
3219 }
3220
3221 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3222                       int datasync)
3223 {
3224         unsigned int xid;
3225         int rc = 0;
3226         struct cifs_tcon *tcon;
3227         struct TCP_Server_Info *server;
3228         struct cifsFileInfo *smbfile = file->private_data;
3229         struct inode *inode = file_inode(file);
3230         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3231
3232         rc = file_write_and_wait_range(file, start, end);
3233         if (rc) {
3234                 trace_cifs_fsync_err(inode->i_ino, rc);
3235                 return rc;
3236         }
3237
3238         xid = get_xid();
3239
3240         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3241                  file, datasync);
3242
3243         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3244                 rc = cifs_zap_mapping(inode);
3245                 if (rc) {
3246                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3247                         rc = 0; /* don't care about it in fsync */
3248                 }
3249         }
3250
3251         tcon = tlink_tcon(smbfile->tlink);
3252         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3253                 server = tcon->ses->server;
3254                 if (server->ops->flush == NULL) {
3255                         rc = -ENOSYS;
3256                         goto strict_fsync_exit;
3257                 }
3258
3259                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3260                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3261                         if (smbfile) {
3262                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3263                                 cifsFileInfo_put(smbfile);
3264                         } else
3265                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3266                 } else
3267                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3268         }
3269
3270 strict_fsync_exit:
3271         free_xid(xid);
3272         return rc;
3273 }
3274
3275 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3276 {
3277         unsigned int xid;
3278         int rc = 0;
3279         struct cifs_tcon *tcon;
3280         struct TCP_Server_Info *server;
3281         struct cifsFileInfo *smbfile = file->private_data;
3282         struct inode *inode = file_inode(file);
3283         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3284
3285         rc = file_write_and_wait_range(file, start, end);
3286         if (rc) {
3287                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3288                 return rc;
3289         }
3290
3291         xid = get_xid();
3292
3293         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3294                  file, datasync);
3295
3296         tcon = tlink_tcon(smbfile->tlink);
3297         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3298                 server = tcon->ses->server;
3299                 if (server->ops->flush == NULL) {
3300                         rc = -ENOSYS;
3301                         goto fsync_exit;
3302                 }
3303
3304                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3305                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3306                         if (smbfile) {
3307                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3308                                 cifsFileInfo_put(smbfile);
3309                         } else
3310                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3311                 } else
3312                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3313         }
3314
3315 fsync_exit:
3316         free_xid(xid);
3317         return rc;
3318 }
3319
3320 /*
3321  * As file closes, flush all cached write data for this inode checking
3322  * for write behind errors.
3323  */
3324 int cifs_flush(struct file *file, fl_owner_t id)
3325 {
3326         struct inode *inode = file_inode(file);
3327         int rc = 0;
3328
3329         if (file->f_mode & FMODE_WRITE)
3330                 rc = filemap_write_and_wait(inode->i_mapping);
3331
3332         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3333         if (rc) {
3334                 /* get more nuanced writeback errors */
3335                 rc = filemap_check_wb_err(file->f_mapping, 0);
3336                 trace_cifs_flush_err(inode->i_ino, rc);
3337         }
3338         return rc;
3339 }
3340
3341 static void
3342 cifs_uncached_writedata_release(struct kref *refcount)
3343 {
3344         struct cifs_writedata *wdata = container_of(refcount,
3345                                         struct cifs_writedata, refcount);
3346
3347         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3348         cifs_writedata_release(refcount);
3349 }
3350
3351 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3352
3353 static void
3354 cifs_uncached_writev_complete(struct work_struct *work)
3355 {
3356         struct cifs_writedata *wdata = container_of(work,
3357                                         struct cifs_writedata, work);
3358         struct inode *inode = d_inode(wdata->cfile->dentry);
3359         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3360
3361         spin_lock(&inode->i_lock);
3362         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3363         if (cifsi->netfs.remote_i_size > inode->i_size)
3364                 i_size_write(inode, cifsi->netfs.remote_i_size);
3365         spin_unlock(&inode->i_lock);
3366
3367         complete(&wdata->done);
3368         collect_uncached_write_data(wdata->ctx);
3369         /* the below call can possibly free the last ref to aio ctx */
3370         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3371 }
3372
3373 static int
3374 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3375         struct cifs_aio_ctx *ctx)
3376 {
3377         unsigned int wsize;
3378         struct cifs_credits credits;
3379         int rc;
3380         struct TCP_Server_Info *server = wdata->server;
3381
3382         do {
3383                 if (wdata->cfile->invalidHandle) {
3384                         rc = cifs_reopen_file(wdata->cfile, false);
3385                         if (rc == -EAGAIN)
3386                                 continue;
3387                         else if (rc)
3388                                 break;
3389                 }
3390
3391
3392                 /*
3393                  * Wait for credits to resend this wdata.
3394                  * Note: we are attempting to resend the whole wdata not in
3395                  * segments
3396                  */
3397                 do {
3398                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3399                                                 &wsize, &credits);
3400                         if (rc)
3401                                 goto fail;
3402
3403                         if (wsize < wdata->bytes) {
3404                                 add_credits_and_wake_if(server, &credits, 0);
3405                                 msleep(1000);
3406                         }
3407                 } while (wsize < wdata->bytes);
3408                 wdata->credits = credits;
3409
3410                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3411
3412                 if (!rc) {
3413                         if (wdata->cfile->invalidHandle)
3414                                 rc = -EAGAIN;
3415                         else {
3416                                 wdata->replay = true;
3417 #ifdef CONFIG_CIFS_SMB_DIRECT
3418                                 if (wdata->mr) {
3419                                         wdata->mr->need_invalidate = true;
3420                                         smbd_deregister_mr(wdata->mr);
3421                                         wdata->mr = NULL;
3422                                 }
3423 #endif
3424                                 rc = server->ops->async_writev(wdata,
3425                                         cifs_uncached_writedata_release);
3426                         }
3427                 }
3428
3429                 /* If the write was successfully sent, we are done */
3430                 if (!rc) {
3431                         list_add_tail(&wdata->list, wdata_list);
3432                         return 0;
3433                 }
3434
3435                 /* Roll back credits and retry if needed */
3436                 add_credits_and_wake_if(server, &wdata->credits, 0);
3437         } while (rc == -EAGAIN);
3438
3439 fail:
3440         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3441         return rc;
3442 }
3443
3444 /*
3445  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3446  * size and maximum number of segments.
3447  */
3448 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3449                                      size_t max_segs, unsigned int *_nsegs)
3450 {
3451         const struct bio_vec *bvecs = iter->bvec;
3452         unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3453         size_t len, span = 0, n = iter->count;
3454         size_t skip = iter->iov_offset;
3455
3456         if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3457                 return 0;
3458
3459         while (n && ix < nbv && skip) {
3460                 len = bvecs[ix].bv_len;
3461                 if (skip < len)
3462                         break;
3463                 skip -= len;
3464                 n -= len;
3465                 ix++;
3466         }
3467
3468         while (n && ix < nbv) {
3469                 len = min3(n, bvecs[ix].bv_len - skip, max_size);
3470                 span += len;
3471                 max_size -= len;
3472                 nsegs++;
3473                 ix++;
3474                 if (max_size == 0 || nsegs >= max_segs)
3475                         break;
3476                 skip = 0;
3477                 n -= len;
3478         }
3479
3480         *_nsegs = nsegs;
3481         return span;
3482 }
3483
3484 static int
3485 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3486                      struct cifsFileInfo *open_file,
3487                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3488                      struct cifs_aio_ctx *ctx)
3489 {
3490         int rc = 0;
3491         size_t cur_len, max_len;
3492         struct cifs_writedata *wdata;
3493         pid_t pid;
3494         struct TCP_Server_Info *server;
3495         unsigned int xid, max_segs = INT_MAX;
3496
3497         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3498                 pid = open_file->pid;
3499         else
3500                 pid = current->tgid;
3501
3502         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3503         xid = get_xid();
3504
3505 #ifdef CONFIG_CIFS_SMB_DIRECT
3506         if (server->smbd_conn)
3507                 max_segs = server->smbd_conn->max_frmr_depth;
3508 #endif
3509
3510         do {
3511                 struct cifs_credits credits_on_stack;
3512                 struct cifs_credits *credits = &credits_on_stack;
3513                 unsigned int wsize, nsegs = 0;
3514
3515                 if (signal_pending(current)) {
3516                         rc = -EINTR;
3517                         break;
3518                 }
3519
3520                 if (open_file->invalidHandle) {
3521                         rc = cifs_reopen_file(open_file, false);
3522                         if (rc == -EAGAIN)
3523                                 continue;
3524                         else if (rc)
3525                                 break;
3526                 }
3527
3528                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3529                                                    &wsize, credits);
3530                 if (rc)
3531                         break;
3532
3533                 max_len = min_t(const size_t, len, wsize);
3534                 if (!max_len) {
3535                         rc = -EAGAIN;
3536                         add_credits_and_wake_if(server, credits, 0);
3537                         break;
3538                 }
3539
3540                 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3541                 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3542                          cur_len, max_len, nsegs, from->nr_segs, max_segs);
3543                 if (cur_len == 0) {
3544                         rc = -EIO;
3545                         add_credits_and_wake_if(server, credits, 0);
3546                         break;
3547                 }
3548
3549                 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3550                 if (!wdata) {
3551                         rc = -ENOMEM;
3552                         add_credits_and_wake_if(server, credits, 0);
3553                         break;
3554                 }
3555
3556                 wdata->sync_mode = WB_SYNC_ALL;
3557                 wdata->offset   = (__u64)fpos;
3558                 wdata->cfile    = cifsFileInfo_get(open_file);
3559                 wdata->server   = server;
3560                 wdata->pid      = pid;
3561                 wdata->bytes    = cur_len;
3562                 wdata->credits  = credits_on_stack;
3563                 wdata->iter     = *from;
3564                 wdata->ctx      = ctx;
3565                 kref_get(&ctx->refcount);
3566
3567                 iov_iter_truncate(&wdata->iter, cur_len);
3568
3569                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3570
3571                 if (!rc) {
3572                         if (wdata->cfile->invalidHandle)
3573                                 rc = -EAGAIN;
3574                         else
3575                                 rc = server->ops->async_writev(wdata,
3576                                         cifs_uncached_writedata_release);
3577                 }
3578
3579                 if (rc) {
3580                         add_credits_and_wake_if(server, &wdata->credits, 0);
3581                         kref_put(&wdata->refcount,
3582                                  cifs_uncached_writedata_release);
3583                         if (rc == -EAGAIN)
3584                                 continue;
3585                         break;
3586                 }
3587
3588                 list_add_tail(&wdata->list, wdata_list);
3589                 iov_iter_advance(from, cur_len);
3590                 fpos += cur_len;
3591                 len -= cur_len;
3592         } while (len > 0);
3593
3594         free_xid(xid);
3595         return rc;
3596 }
3597
3598 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3599 {
3600         struct cifs_writedata *wdata, *tmp;
3601         struct cifs_tcon *tcon;
3602         struct cifs_sb_info *cifs_sb;
3603         struct dentry *dentry = ctx->cfile->dentry;
3604         ssize_t rc;
3605
3606         tcon = tlink_tcon(ctx->cfile->tlink);
3607         cifs_sb = CIFS_SB(dentry->d_sb);
3608
3609         mutex_lock(&ctx->aio_mutex);
3610
3611         if (list_empty(&ctx->list)) {
3612                 mutex_unlock(&ctx->aio_mutex);
3613                 return;
3614         }
3615
3616         rc = ctx->rc;
3617         /*
3618          * Wait for and collect replies for any successful sends in order of
3619          * increasing offset. Once an error is hit, then return without waiting
3620          * for any more replies.
3621          */
3622 restart_loop:
3623         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3624                 if (!rc) {
3625                         if (!try_wait_for_completion(&wdata->done)) {
3626                                 mutex_unlock(&ctx->aio_mutex);
3627                                 return;
3628                         }
3629
3630                         if (wdata->result)
3631                                 rc = wdata->result;
3632                         else
3633                                 ctx->total_len += wdata->bytes;
3634
3635                         /* resend call if it's a retryable error */
3636                         if (rc == -EAGAIN) {
3637                                 struct list_head tmp_list;
3638                                 struct iov_iter tmp_from = ctx->iter;
3639
3640                                 INIT_LIST_HEAD(&tmp_list);
3641                                 list_del_init(&wdata->list);
3642
3643                                 if (ctx->direct_io)
3644                                         rc = cifs_resend_wdata(
3645                                                 wdata, &tmp_list, ctx);
3646                                 else {
3647                                         iov_iter_advance(&tmp_from,
3648                                                  wdata->offset - ctx->pos);
3649
3650                                         rc = cifs_write_from_iter(wdata->offset,
3651                                                 wdata->bytes, &tmp_from,
3652                                                 ctx->cfile, cifs_sb, &tmp_list,
3653                                                 ctx);
3654
3655                                         kref_put(&wdata->refcount,
3656                                                 cifs_uncached_writedata_release);
3657                                 }
3658
3659                                 list_splice(&tmp_list, &ctx->list);
3660                                 goto restart_loop;
3661                         }
3662                 }
3663                 list_del_init(&wdata->list);
3664                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3665         }
3666
3667         cifs_stats_bytes_written(tcon, ctx->total_len);
3668         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3669
3670         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3671
3672         mutex_unlock(&ctx->aio_mutex);
3673
3674         if (ctx->iocb && ctx->iocb->ki_complete)
3675                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3676         else
3677                 complete(&ctx->done);
3678 }
3679
3680 static ssize_t __cifs_writev(
3681         struct kiocb *iocb, struct iov_iter *from, bool direct)
3682 {
3683         struct file *file = iocb->ki_filp;
3684         ssize_t total_written = 0;
3685         struct cifsFileInfo *cfile;
3686         struct cifs_tcon *tcon;
3687         struct cifs_sb_info *cifs_sb;
3688         struct cifs_aio_ctx *ctx;
3689         int rc;
3690
3691         rc = generic_write_checks(iocb, from);
3692         if (rc <= 0)
3693                 return rc;
3694
3695         cifs_sb = CIFS_FILE_SB(file);
3696         cfile = file->private_data;
3697         tcon = tlink_tcon(cfile->tlink);
3698
3699         if (!tcon->ses->server->ops->async_writev)
3700                 return -ENOSYS;
3701
3702         ctx = cifs_aio_ctx_alloc();
3703         if (!ctx)
3704                 return -ENOMEM;
3705
3706         ctx->cfile = cifsFileInfo_get(cfile);
3707
3708         if (!is_sync_kiocb(iocb))
3709                 ctx->iocb = iocb;
3710
3711         ctx->pos = iocb->ki_pos;
3712         ctx->direct_io = direct;
3713         ctx->nr_pinned_pages = 0;
3714
3715         if (user_backed_iter(from)) {
3716                 /*
3717                  * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3718                  * they contain references to the calling process's virtual
3719                  * memory layout which won't be available in an async worker
3720                  * thread.  This also takes a pin on every folio involved.
3721                  */
3722                 rc = netfs_extract_user_iter(from, iov_iter_count(from),
3723                                              &ctx->iter, 0);
3724                 if (rc < 0) {
3725                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3726                         return rc;
3727                 }
3728
3729                 ctx->nr_pinned_pages = rc;
3730                 ctx->bv = (void *)ctx->iter.bvec;
3731                 ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3732         } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3733                    !is_sync_kiocb(iocb)) {
3734                 /*
3735                  * If the op is asynchronous, we need to copy the list attached
3736                  * to a BVEC/KVEC-type iterator, but we assume that the storage
3737                  * will be pinned by the caller; in any case, we may or may not
3738                  * be able to pin the pages, so we don't try.
3739                  */
3740                 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3741                 if (!ctx->bv) {
3742                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3743                         return -ENOMEM;
3744                 }
3745         } else {
3746                 /*
3747                  * Otherwise, we just pass the iterator down as-is and rely on
3748                  * the caller to make sure the pages referred to by the
3749                  * iterator don't evaporate.
3750                  */
3751                 ctx->iter = *from;
3752         }
3753
3754         ctx->len = iov_iter_count(&ctx->iter);
3755
3756         /* grab a lock here due to read response handlers can access ctx */
3757         mutex_lock(&ctx->aio_mutex);
3758
3759         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3760                                   cfile, cifs_sb, &ctx->list, ctx);
3761
3762         /*
3763          * If at least one write was successfully sent, then discard any rc
3764          * value from the later writes. If the other write succeeds, then
3765          * we'll end up returning whatever was written. If it fails, then
3766          * we'll get a new rc value from that.
3767          */
3768         if (!list_empty(&ctx->list))
3769                 rc = 0;
3770
3771         mutex_unlock(&ctx->aio_mutex);
3772
3773         if (rc) {
3774                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3775                 return rc;
3776         }
3777
3778         if (!is_sync_kiocb(iocb)) {
3779                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3780                 return -EIOCBQUEUED;
3781         }
3782
3783         rc = wait_for_completion_killable(&ctx->done);
3784         if (rc) {
3785                 mutex_lock(&ctx->aio_mutex);
3786                 ctx->rc = rc = -EINTR;
3787                 total_written = ctx->total_len;
3788                 mutex_unlock(&ctx->aio_mutex);
3789         } else {
3790                 rc = ctx->rc;
3791                 total_written = ctx->total_len;
3792         }
3793
3794         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3795
3796         if (unlikely(!total_written))
3797                 return rc;
3798
3799         iocb->ki_pos += total_written;
3800         return total_written;
3801 }
3802
3803 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3804 {
3805         struct file *file = iocb->ki_filp;
3806
3807         cifs_revalidate_mapping(file->f_inode);
3808         return __cifs_writev(iocb, from, true);
3809 }
3810
3811 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3812 {
3813         return __cifs_writev(iocb, from, false);
3814 }
3815
3816 static ssize_t
3817 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3818 {
3819         struct file *file = iocb->ki_filp;
3820         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3821         struct inode *inode = file->f_mapping->host;
3822         struct cifsInodeInfo *cinode = CIFS_I(inode);
3823         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3824         ssize_t rc;
3825
3826         inode_lock(inode);
3827         /*
3828          * We need to hold the sem to be sure nobody modifies lock list
3829          * with a brlock that prevents writing.
3830          */
3831         down_read(&cinode->lock_sem);
3832
3833         rc = generic_write_checks(iocb, from);
3834         if (rc <= 0)
3835                 goto out;
3836
3837         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3838                                      server->vals->exclusive_lock_type, 0,
3839                                      NULL, CIFS_WRITE_OP))
3840                 rc = __generic_file_write_iter(iocb, from);
3841         else
3842                 rc = -EACCES;
3843 out:
3844         up_read(&cinode->lock_sem);
3845         inode_unlock(inode);
3846
3847         if (rc > 0)
3848                 rc = generic_write_sync(iocb, rc);
3849         return rc;
3850 }
3851
3852 ssize_t
3853 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3854 {
3855         struct inode *inode = file_inode(iocb->ki_filp);
3856         struct cifsInodeInfo *cinode = CIFS_I(inode);
3857         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3858         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3859                                                 iocb->ki_filp->private_data;
3860         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3861         ssize_t written;
3862
3863         written = cifs_get_writer(cinode);
3864         if (written)
3865                 return written;
3866
3867         if (CIFS_CACHE_WRITE(cinode)) {
3868                 if (cap_unix(tcon->ses) &&
3869                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3870                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3871                         written = generic_file_write_iter(iocb, from);
3872                         goto out;
3873                 }
3874                 written = cifs_writev(iocb, from);
3875                 goto out;
3876         }
3877         /*
3878          * For non-oplocked files in strict cache mode we need to write the data
3879          * to the server exactly from the pos to pos+len-1 rather than flush all
3880          * affected pages because it may cause a error with mandatory locks on
3881          * these pages but not on the region from pos to ppos+len-1.
3882          */
3883         written = cifs_user_writev(iocb, from);
3884         if (CIFS_CACHE_READ(cinode)) {
3885                 /*
3886                  * We have read level caching and we have just sent a write
3887                  * request to the server thus making data in the cache stale.
3888                  * Zap the cache and set oplock/lease level to NONE to avoid
3889                  * reading stale data from the cache. All subsequent read
3890                  * operations will read new data from the server.
3891                  */
3892                 cifs_zap_mapping(inode);
3893                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3894                          inode);
3895                 cinode->oplock = 0;
3896         }
3897 out:
3898         cifs_put_writer(cinode);
3899         return written;
3900 }
3901
3902 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3903 {
3904         struct cifs_readdata *rdata;
3905
3906         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3907         if (rdata) {
3908                 kref_init(&rdata->refcount);
3909                 INIT_LIST_HEAD(&rdata->list);
3910                 init_completion(&rdata->done);
3911                 INIT_WORK(&rdata->work, complete);
3912         }
3913
3914         return rdata;
3915 }
3916
3917 void
3918 cifs_readdata_release(struct kref *refcount)
3919 {
3920         struct cifs_readdata *rdata = container_of(refcount,
3921                                         struct cifs_readdata, refcount);
3922
3923         if (rdata->ctx)
3924                 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3925 #ifdef CONFIG_CIFS_SMB_DIRECT
3926         if (rdata->mr) {
3927                 smbd_deregister_mr(rdata->mr);
3928                 rdata->mr = NULL;
3929         }
3930 #endif
3931         if (rdata->cfile)
3932                 cifsFileInfo_put(rdata->cfile);
3933
3934         kfree(rdata);
3935 }
3936
3937 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3938
3939 static void
3940 cifs_uncached_readv_complete(struct work_struct *work)
3941 {
3942         struct cifs_readdata *rdata = container_of(work,
3943                                                 struct cifs_readdata, work);
3944
3945         complete(&rdata->done);
3946         collect_uncached_read_data(rdata->ctx);
3947         /* the below call can possibly free the last ref to aio ctx */
3948         kref_put(&rdata->refcount, cifs_readdata_release);
3949 }
3950
3951 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3952                         struct list_head *rdata_list,
3953                         struct cifs_aio_ctx *ctx)
3954 {
3955         unsigned int rsize;
3956         struct cifs_credits credits;
3957         int rc;
3958         struct TCP_Server_Info *server;
3959
3960         /* XXX: should we pick a new channel here? */
3961         server = rdata->server;
3962
3963         do {
3964                 if (rdata->cfile->invalidHandle) {
3965                         rc = cifs_reopen_file(rdata->cfile, true);
3966                         if (rc == -EAGAIN)
3967                                 continue;
3968                         else if (rc)
3969                                 break;
3970                 }
3971
3972                 /*
3973                  * Wait for credits to resend this rdata.
3974                  * Note: we are attempting to resend the whole rdata not in
3975                  * segments
3976                  */
3977                 do {
3978                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3979                                                 &rsize, &credits);
3980
3981                         if (rc)
3982                                 goto fail;
3983
3984                         if (rsize < rdata->bytes) {
3985                                 add_credits_and_wake_if(server, &credits, 0);
3986                                 msleep(1000);
3987                         }
3988                 } while (rsize < rdata->bytes);
3989                 rdata->credits = credits;
3990
3991                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3992                 if (!rc) {
3993                         if (rdata->cfile->invalidHandle)
3994                                 rc = -EAGAIN;
3995                         else {
3996 #ifdef CONFIG_CIFS_SMB_DIRECT
3997                                 if (rdata->mr) {
3998                                         rdata->mr->need_invalidate = true;
3999                                         smbd_deregister_mr(rdata->mr);
4000                                         rdata->mr = NULL;
4001                                 }
4002 #endif
4003                                 rc = server->ops->async_readv(rdata);
4004                         }
4005                 }
4006
4007                 /* If the read was successfully sent, we are done */
4008                 if (!rc) {
4009                         /* Add to aio pending list */
4010                         list_add_tail(&rdata->list, rdata_list);
4011                         return 0;
4012                 }
4013
4014                 /* Roll back credits and retry if needed */
4015                 add_credits_and_wake_if(server, &rdata->credits, 0);
4016         } while (rc == -EAGAIN);
4017
4018 fail:
4019         kref_put(&rdata->refcount, cifs_readdata_release);
4020         return rc;
4021 }
4022
4023 static int
4024 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
4025                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
4026                      struct cifs_aio_ctx *ctx)
4027 {
4028         struct cifs_readdata *rdata;
4029         unsigned int rsize, nsegs, max_segs = INT_MAX;
4030         struct cifs_credits credits_on_stack;
4031         struct cifs_credits *credits = &credits_on_stack;
4032         size_t cur_len, max_len;
4033         int rc;
4034         pid_t pid;
4035         struct TCP_Server_Info *server;
4036
4037         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4038
4039 #ifdef CONFIG_CIFS_SMB_DIRECT
4040         if (server->smbd_conn)
4041                 max_segs = server->smbd_conn->max_frmr_depth;
4042 #endif
4043
4044         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4045                 pid = open_file->pid;
4046         else
4047                 pid = current->tgid;
4048
4049         do {
4050                 if (open_file->invalidHandle) {
4051                         rc = cifs_reopen_file(open_file, true);
4052                         if (rc == -EAGAIN)
4053                                 continue;
4054                         else if (rc)
4055                                 break;
4056                 }
4057
4058                 if (cifs_sb->ctx->rsize == 0)
4059                         cifs_sb->ctx->rsize =
4060                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4061                                                              cifs_sb->ctx);
4062
4063                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4064                                                    &rsize, credits);
4065                 if (rc)
4066                         break;
4067
4068                 max_len = min_t(size_t, len, rsize);
4069
4070                 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
4071                                                  max_segs, &nsegs);
4072                 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
4073                          cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
4074                 if (cur_len == 0) {
4075                         rc = -EIO;
4076                         add_credits_and_wake_if(server, credits, 0);
4077                         break;
4078                 }
4079
4080                 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
4081                 if (!rdata) {
4082                         add_credits_and_wake_if(server, credits, 0);
4083                         rc = -ENOMEM;
4084                         break;
4085                 }
4086
4087                 rdata->server   = server;
4088                 rdata->cfile    = cifsFileInfo_get(open_file);
4089                 rdata->offset   = fpos;
4090                 rdata->bytes    = cur_len;
4091                 rdata->pid      = pid;
4092                 rdata->credits  = credits_on_stack;
4093                 rdata->ctx      = ctx;
4094                 kref_get(&ctx->refcount);
4095
4096                 rdata->iter     = ctx->iter;
4097                 iov_iter_truncate(&rdata->iter, cur_len);
4098
4099                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4100
4101                 if (!rc) {
4102                         if (rdata->cfile->invalidHandle)
4103                                 rc = -EAGAIN;
4104                         else
4105                                 rc = server->ops->async_readv(rdata);
4106                 }
4107
4108                 if (rc) {
4109                         add_credits_and_wake_if(server, &rdata->credits, 0);
4110                         kref_put(&rdata->refcount, cifs_readdata_release);
4111                         if (rc == -EAGAIN)
4112                                 continue;
4113                         break;
4114                 }
4115
4116                 list_add_tail(&rdata->list, rdata_list);
4117                 iov_iter_advance(&ctx->iter, cur_len);
4118                 fpos += cur_len;
4119                 len -= cur_len;
4120         } while (len > 0);
4121
4122         return rc;
4123 }
4124
4125 static void
4126 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4127 {
4128         struct cifs_readdata *rdata, *tmp;
4129         struct cifs_sb_info *cifs_sb;
4130         int rc;
4131
4132         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4133
4134         mutex_lock(&ctx->aio_mutex);
4135
4136         if (list_empty(&ctx->list)) {
4137                 mutex_unlock(&ctx->aio_mutex);
4138                 return;
4139         }
4140
4141         rc = ctx->rc;
4142         /* the loop below should proceed in the order of increasing offsets */
4143 again:
4144         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4145                 if (!rc) {
4146                         if (!try_wait_for_completion(&rdata->done)) {
4147                                 mutex_unlock(&ctx->aio_mutex);
4148                                 return;
4149                         }
4150
4151                         if (rdata->result == -EAGAIN) {
4152                                 /* resend call if it's a retryable error */
4153                                 struct list_head tmp_list;
4154                                 unsigned int got_bytes = rdata->got_bytes;
4155
4156                                 list_del_init(&rdata->list);
4157                                 INIT_LIST_HEAD(&tmp_list);
4158
4159                                 if (ctx->direct_io) {
4160                                         /*
4161                                          * Re-use rdata as this is a
4162                                          * direct I/O
4163                                          */
4164                                         rc = cifs_resend_rdata(
4165                                                 rdata,
4166                                                 &tmp_list, ctx);
4167                                 } else {
4168                                         rc = cifs_send_async_read(
4169                                                 rdata->offset + got_bytes,
4170                                                 rdata->bytes - got_bytes,
4171                                                 rdata->cfile, cifs_sb,
4172                                                 &tmp_list, ctx);
4173
4174                                         kref_put(&rdata->refcount,
4175                                                 cifs_readdata_release);
4176                                 }
4177
4178                                 list_splice(&tmp_list, &ctx->list);
4179
4180                                 goto again;
4181                         } else if (rdata->result)
4182                                 rc = rdata->result;
4183
4184                         /* if there was a short read -- discard anything left */
4185                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4186                                 rc = -ENODATA;
4187
4188                         ctx->total_len += rdata->got_bytes;
4189                 }
4190                 list_del_init(&rdata->list);
4191                 kref_put(&rdata->refcount, cifs_readdata_release);
4192         }
4193
4194         /* mask nodata case */
4195         if (rc == -ENODATA)
4196                 rc = 0;
4197
4198         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4199
4200         mutex_unlock(&ctx->aio_mutex);
4201
4202         if (ctx->iocb && ctx->iocb->ki_complete)
4203                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4204         else
4205                 complete(&ctx->done);
4206 }
4207
4208 static ssize_t __cifs_readv(
4209         struct kiocb *iocb, struct iov_iter *to, bool direct)
4210 {
4211         size_t len;
4212         struct file *file = iocb->ki_filp;
4213         struct cifs_sb_info *cifs_sb;
4214         struct cifsFileInfo *cfile;
4215         struct cifs_tcon *tcon;
4216         ssize_t rc, total_read = 0;
4217         loff_t offset = iocb->ki_pos;
4218         struct cifs_aio_ctx *ctx;
4219
4220         len = iov_iter_count(to);
4221         if (!len)
4222                 return 0;
4223
4224         cifs_sb = CIFS_FILE_SB(file);
4225         cfile = file->private_data;
4226         tcon = tlink_tcon(cfile->tlink);
4227
4228         if (!tcon->ses->server->ops->async_readv)
4229                 return -ENOSYS;
4230
4231         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4232                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4233
4234         ctx = cifs_aio_ctx_alloc();
4235         if (!ctx)
4236                 return -ENOMEM;
4237
4238         ctx->pos        = offset;
4239         ctx->direct_io  = direct;
4240         ctx->len        = len;
4241         ctx->cfile      = cifsFileInfo_get(cfile);
4242         ctx->nr_pinned_pages = 0;
4243
4244         if (!is_sync_kiocb(iocb))
4245                 ctx->iocb = iocb;
4246
4247         if (user_backed_iter(to)) {
4248                 /*
4249                  * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4250                  * they contain references to the calling process's virtual
4251                  * memory layout which won't be available in an async worker
4252                  * thread.  This also takes a pin on every folio involved.
4253                  */
4254                 rc = netfs_extract_user_iter(to, iov_iter_count(to),
4255                                              &ctx->iter, 0);
4256                 if (rc < 0) {
4257                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4258                         return rc;
4259                 }
4260
4261                 ctx->nr_pinned_pages = rc;
4262                 ctx->bv = (void *)ctx->iter.bvec;
4263                 ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4264                 ctx->should_dirty = true;
4265         } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4266                    !is_sync_kiocb(iocb)) {
4267                 /*
4268                  * If the op is asynchronous, we need to copy the list attached
4269                  * to a BVEC/KVEC-type iterator, but we assume that the storage
4270                  * will be retained by the caller; in any case, we may or may
4271                  * not be able to pin the pages, so we don't try.
4272                  */
4273                 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4274                 if (!ctx->bv) {
4275                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4276                         return -ENOMEM;
4277                 }
4278         } else {
4279                 /*
4280                  * Otherwise, we just pass the iterator down as-is and rely on
4281                  * the caller to make sure the pages referred to by the
4282                  * iterator don't evaporate.
4283                  */
4284                 ctx->iter = *to;
4285         }
4286
4287         if (direct) {
4288                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4289                                                   offset, offset + len - 1);
4290                 if (rc) {
4291                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4292                         return -EAGAIN;
4293                 }
4294         }
4295
4296         /* grab a lock here due to read response handlers can access ctx */
4297         mutex_lock(&ctx->aio_mutex);
4298
4299         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4300
4301         /* if at least one read request send succeeded, then reset rc */
4302         if (!list_empty(&ctx->list))
4303                 rc = 0;
4304
4305         mutex_unlock(&ctx->aio_mutex);
4306
4307         if (rc) {
4308                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4309                 return rc;
4310         }
4311
4312         if (!is_sync_kiocb(iocb)) {
4313                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4314                 return -EIOCBQUEUED;
4315         }
4316
4317         rc = wait_for_completion_killable(&ctx->done);
4318         if (rc) {
4319                 mutex_lock(&ctx->aio_mutex);
4320                 ctx->rc = rc = -EINTR;
4321                 total_read = ctx->total_len;
4322                 mutex_unlock(&ctx->aio_mutex);
4323         } else {
4324                 rc = ctx->rc;
4325                 total_read = ctx->total_len;
4326         }
4327
4328         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4329
4330         if (total_read) {
4331                 iocb->ki_pos += total_read;
4332                 return total_read;
4333         }
4334         return rc;
4335 }
4336
4337 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4338 {
4339         return __cifs_readv(iocb, to, true);
4340 }
4341
4342 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4343 {
4344         return __cifs_readv(iocb, to, false);
4345 }
4346
4347 ssize_t
4348 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4349 {
4350         struct inode *inode = file_inode(iocb->ki_filp);
4351         struct cifsInodeInfo *cinode = CIFS_I(inode);
4352         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4353         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4354                                                 iocb->ki_filp->private_data;
4355         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4356         int rc = -EACCES;
4357
4358         /*
4359          * In strict cache mode we need to read from the server all the time
4360          * if we don't have level II oplock because the server can delay mtime
4361          * change - so we can't make a decision about inode invalidating.
4362          * And we can also fail with pagereading if there are mandatory locks
4363          * on pages affected by this read but not on the region from pos to
4364          * pos+len-1.
4365          */
4366         if (!CIFS_CACHE_READ(cinode))
4367                 return cifs_user_readv(iocb, to);
4368
4369         if (cap_unix(tcon->ses) &&
4370             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4371             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4372                 return generic_file_read_iter(iocb, to);
4373
4374         /*
4375          * We need to hold the sem to be sure nobody modifies lock list
4376          * with a brlock that prevents reading.
4377          */
4378         down_read(&cinode->lock_sem);
4379         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4380                                      tcon->ses->server->vals->shared_lock_type,
4381                                      0, NULL, CIFS_READ_OP))
4382                 rc = generic_file_read_iter(iocb, to);
4383         up_read(&cinode->lock_sem);
4384         return rc;
4385 }
4386
4387 static ssize_t
4388 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4389 {
4390         int rc = -EACCES;
4391         unsigned int bytes_read = 0;
4392         unsigned int total_read;
4393         unsigned int current_read_size;
4394         unsigned int rsize;
4395         struct cifs_sb_info *cifs_sb;
4396         struct cifs_tcon *tcon;
4397         struct TCP_Server_Info *server;
4398         unsigned int xid;
4399         char *cur_offset;
4400         struct cifsFileInfo *open_file;
4401         struct cifs_io_parms io_parms = {0};
4402         int buf_type = CIFS_NO_BUFFER;
4403         __u32 pid;
4404
4405         xid = get_xid();
4406         cifs_sb = CIFS_FILE_SB(file);
4407
4408         /* FIXME: set up handlers for larger reads and/or convert to async */
4409         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4410
4411         if (file->private_data == NULL) {
4412                 rc = -EBADF;
4413                 free_xid(xid);
4414                 return rc;
4415         }
4416         open_file = file->private_data;
4417         tcon = tlink_tcon(open_file->tlink);
4418         server = cifs_pick_channel(tcon->ses);
4419
4420         if (!server->ops->sync_read) {
4421                 free_xid(xid);
4422                 return -ENOSYS;
4423         }
4424
4425         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4426                 pid = open_file->pid;
4427         else
4428                 pid = current->tgid;
4429
4430         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4431                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4432
4433         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4434              total_read += bytes_read, cur_offset += bytes_read) {
4435                 do {
4436                         current_read_size = min_t(uint, read_size - total_read,
4437                                                   rsize);
4438                         /*
4439                          * For windows me and 9x we do not want to request more
4440                          * than it negotiated since it will refuse the read
4441                          * then.
4442                          */
4443                         if (!(tcon->ses->capabilities &
4444                                 tcon->ses->server->vals->cap_large_files)) {
4445                                 current_read_size = min_t(uint,
4446                                         current_read_size, CIFSMaxBufSize);
4447                         }
4448                         if (open_file->invalidHandle) {
4449                                 rc = cifs_reopen_file(open_file, true);
4450                                 if (rc != 0)
4451                                         break;
4452                         }
4453                         io_parms.pid = pid;
4454                         io_parms.tcon = tcon;
4455                         io_parms.offset = *offset;
4456                         io_parms.length = current_read_size;
4457                         io_parms.server = server;
4458                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4459                                                     &bytes_read, &cur_offset,
4460                                                     &buf_type);
4461                 } while (rc == -EAGAIN);
4462
4463                 if (rc || (bytes_read == 0)) {
4464                         if (total_read) {
4465                                 break;
4466                         } else {
4467                                 free_xid(xid);
4468                                 return rc;
4469                         }
4470                 } else {
4471                         cifs_stats_bytes_read(tcon, total_read);
4472                         *offset += bytes_read;
4473                 }
4474         }
4475         free_xid(xid);
4476         return total_read;
4477 }
4478
4479 /*
4480  * If the page is mmap'ed into a process' page tables, then we need to make
4481  * sure that it doesn't change while being written back.
4482  */
4483 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4484 {
4485         struct folio *folio = page_folio(vmf->page);
4486
4487         /* Wait for the folio to be written to the cache before we allow it to
4488          * be modified.  We then assume the entire folio will need writing back.
4489          */
4490 #ifdef CONFIG_CIFS_FSCACHE
4491         if (folio_test_fscache(folio) &&
4492             folio_wait_fscache_killable(folio) < 0)
4493                 return VM_FAULT_RETRY;
4494 #endif
4495
4496         folio_wait_writeback(folio);
4497
4498         if (folio_lock_killable(folio) < 0)
4499                 return VM_FAULT_RETRY;
4500         return VM_FAULT_LOCKED;
4501 }
4502
4503 static const struct vm_operations_struct cifs_file_vm_ops = {
4504         .fault = filemap_fault,
4505         .map_pages = filemap_map_pages,
4506         .page_mkwrite = cifs_page_mkwrite,
4507 };
4508
4509 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4510 {
4511         int xid, rc = 0;
4512         struct inode *inode = file_inode(file);
4513
4514         xid = get_xid();
4515
4516         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4517                 rc = cifs_zap_mapping(inode);
4518         if (!rc)
4519                 rc = generic_file_mmap(file, vma);
4520         if (!rc)
4521                 vma->vm_ops = &cifs_file_vm_ops;
4522
4523         free_xid(xid);
4524         return rc;
4525 }
4526
4527 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4528 {
4529         int rc, xid;
4530
4531         xid = get_xid();
4532
4533         rc = cifs_revalidate_file(file);
4534         if (rc)
4535                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4536                          rc);
4537         if (!rc)
4538                 rc = generic_file_mmap(file, vma);
4539         if (!rc)
4540                 vma->vm_ops = &cifs_file_vm_ops;
4541
4542         free_xid(xid);
4543         return rc;
4544 }
4545
4546 /*
4547  * Unlock a bunch of folios in the pagecache.
4548  */
4549 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4550 {
4551         struct folio *folio;
4552         XA_STATE(xas, &mapping->i_pages, first);
4553
4554         rcu_read_lock();
4555         xas_for_each(&xas, folio, last) {
4556                 folio_unlock(folio);
4557         }
4558         rcu_read_unlock();
4559 }
4560
4561 static void cifs_readahead_complete(struct work_struct *work)
4562 {
4563         struct cifs_readdata *rdata = container_of(work,
4564                                                    struct cifs_readdata, work);
4565         struct folio *folio;
4566         pgoff_t last;
4567         bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4568
4569         XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4570
4571         if (good)
4572                 cifs_readahead_to_fscache(rdata->mapping->host,
4573                                           rdata->offset, rdata->bytes);
4574
4575         if (iov_iter_count(&rdata->iter) > 0)
4576                 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4577
4578         last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4579
4580         rcu_read_lock();
4581         xas_for_each(&xas, folio, last) {
4582                 if (good) {
4583                         flush_dcache_folio(folio);
4584                         folio_mark_uptodate(folio);
4585                 }
4586                 folio_unlock(folio);
4587         }
4588         rcu_read_unlock();
4589
4590         kref_put(&rdata->refcount, cifs_readdata_release);
4591 }
4592
4593 static void cifs_readahead(struct readahead_control *ractl)
4594 {
4595         struct cifsFileInfo *open_file = ractl->file->private_data;
4596         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4597         struct TCP_Server_Info *server;
4598         unsigned int xid, nr_pages, cache_nr_pages = 0;
4599         unsigned int ra_pages;
4600         pgoff_t next_cached = ULONG_MAX, ra_index;
4601         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4602                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4603         bool check_cache = caching;
4604         pid_t pid;
4605         int rc = 0;
4606
4607         /* Note that readahead_count() lags behind our dequeuing of pages from
4608          * the ractl, wo we have to keep track for ourselves.
4609          */
4610         ra_pages = readahead_count(ractl);
4611         ra_index = readahead_index(ractl);
4612
4613         xid = get_xid();
4614
4615         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4616                 pid = open_file->pid;
4617         else
4618                 pid = current->tgid;
4619
4620         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4621
4622         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4623                  __func__, ractl->file, ractl->mapping, ra_pages);
4624
4625         /*
4626          * Chop the readahead request up into rsize-sized read requests.
4627          */
4628         while ((nr_pages = ra_pages)) {
4629                 unsigned int i, rsize;
4630                 struct cifs_readdata *rdata;
4631                 struct cifs_credits credits_on_stack;
4632                 struct cifs_credits *credits = &credits_on_stack;
4633                 struct folio *folio;
4634                 pgoff_t fsize;
4635
4636                 /*
4637                  * Find out if we have anything cached in the range of
4638                  * interest, and if so, where the next chunk of cached data is.
4639                  */
4640                 if (caching) {
4641                         if (check_cache) {
4642                                 rc = cifs_fscache_query_occupancy(
4643                                         ractl->mapping->host, ra_index, nr_pages,
4644                                         &next_cached, &cache_nr_pages);
4645                                 if (rc < 0)
4646                                         caching = false;
4647                                 check_cache = false;
4648                         }
4649
4650                         if (ra_index == next_cached) {
4651                                 /*
4652                                  * TODO: Send a whole batch of pages to be read
4653                                  * by the cache.
4654                                  */
4655                                 folio = readahead_folio(ractl);
4656                                 fsize = folio_nr_pages(folio);
4657                                 ra_pages -= fsize;
4658                                 ra_index += fsize;
4659                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4660                                                                &folio->page) < 0) {
4661                                         /*
4662                                          * TODO: Deal with cache read failure
4663                                          * here, but for the moment, delegate
4664                                          * that to readpage.
4665                                          */
4666                                         caching = false;
4667                                 }
4668                                 folio_unlock(folio);
4669                                 next_cached += fsize;
4670                                 cache_nr_pages -= fsize;
4671                                 if (cache_nr_pages == 0)
4672                                         check_cache = true;
4673                                 continue;
4674                         }
4675                 }
4676
4677                 if (open_file->invalidHandle) {
4678                         rc = cifs_reopen_file(open_file, true);
4679                         if (rc) {
4680                                 if (rc == -EAGAIN)
4681                                         continue;
4682                                 break;
4683                         }
4684                 }
4685
4686                 if (cifs_sb->ctx->rsize == 0)
4687                         cifs_sb->ctx->rsize =
4688                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4689                                                              cifs_sb->ctx);
4690
4691                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4692                                                    &rsize, credits);
4693                 if (rc)
4694                         break;
4695                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4696                 if (next_cached != ULONG_MAX)
4697                         nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4698
4699                 /*
4700                  * Give up immediately if rsize is too small to read an entire
4701                  * page. The VFS will fall back to readpage. We should never
4702                  * reach this point however since we set ra_pages to 0 when the
4703                  * rsize is smaller than a cache page.
4704                  */
4705                 if (unlikely(!nr_pages)) {
4706                         add_credits_and_wake_if(server, credits, 0);
4707                         break;
4708                 }
4709
4710                 rdata = cifs_readdata_alloc(cifs_readahead_complete);
4711                 if (!rdata) {
4712                         /* best to give up if we're out of mem */
4713                         add_credits_and_wake_if(server, credits, 0);
4714                         break;
4715                 }
4716
4717                 rdata->offset   = ra_index * PAGE_SIZE;
4718                 rdata->bytes    = nr_pages * PAGE_SIZE;
4719                 rdata->cfile    = cifsFileInfo_get(open_file);
4720                 rdata->server   = server;
4721                 rdata->mapping  = ractl->mapping;
4722                 rdata->pid      = pid;
4723                 rdata->credits  = credits_on_stack;
4724
4725                 for (i = 0; i < nr_pages; i++) {
4726                         if (!readahead_folio(ractl))
4727                                 WARN_ON(1);
4728                 }
4729                 ra_pages -= nr_pages;
4730                 ra_index += nr_pages;
4731
4732                 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4733                                 rdata->offset, rdata->bytes);
4734
4735                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4736                 if (!rc) {
4737                         if (rdata->cfile->invalidHandle)
4738                                 rc = -EAGAIN;
4739                         else
4740                                 rc = server->ops->async_readv(rdata);
4741                 }
4742
4743                 if (rc) {
4744                         add_credits_and_wake_if(server, &rdata->credits, 0);
4745                         cifs_unlock_folios(rdata->mapping,
4746                                            rdata->offset / PAGE_SIZE,
4747                                            (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4748                         /* Fallback to the readpage in error/reconnect cases */
4749                         kref_put(&rdata->refcount, cifs_readdata_release);
4750                         break;
4751                 }
4752
4753                 kref_put(&rdata->refcount, cifs_readdata_release);
4754         }
4755
4756         free_xid(xid);
4757 }
4758
4759 /*
4760  * cifs_readpage_worker must be called with the page pinned
4761  */
4762 static int cifs_readpage_worker(struct file *file, struct page *page,
4763         loff_t *poffset)
4764 {
4765         struct inode *inode = file_inode(file);
4766         struct timespec64 atime, mtime;
4767         char *read_data;
4768         int rc;
4769
4770         /* Is the page cached? */
4771         rc = cifs_readpage_from_fscache(inode, page);
4772         if (rc == 0)
4773                 goto read_complete;
4774
4775         read_data = kmap(page);
4776         /* for reads over a certain size could initiate async read ahead */
4777
4778         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4779
4780         if (rc < 0)
4781                 goto io_error;
4782         else
4783                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4784
4785         /* we do not want atime to be less than mtime, it broke some apps */
4786         atime = inode_set_atime_to_ts(inode, current_time(inode));
4787         mtime = inode_get_mtime(inode);
4788         if (timespec64_compare(&atime, &mtime) < 0)
4789                 inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4790
4791         if (PAGE_SIZE > rc)
4792                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4793
4794         flush_dcache_page(page);
4795         SetPageUptodate(page);
4796         rc = 0;
4797
4798 io_error:
4799         kunmap(page);
4800
4801 read_complete:
4802         unlock_page(page);
4803         return rc;
4804 }
4805
4806 static int cifs_read_folio(struct file *file, struct folio *folio)
4807 {
4808         struct page *page = &folio->page;
4809         loff_t offset = page_file_offset(page);
4810         int rc = -EACCES;
4811         unsigned int xid;
4812
4813         xid = get_xid();
4814
4815         if (file->private_data == NULL) {
4816                 rc = -EBADF;
4817                 free_xid(xid);
4818                 return rc;
4819         }
4820
4821         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4822                  page, (int)offset, (int)offset);
4823
4824         rc = cifs_readpage_worker(file, page, &offset);
4825
4826         free_xid(xid);
4827         return rc;
4828 }
4829
4830 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4831 {
4832         struct cifsFileInfo *open_file;
4833
4834         spin_lock(&cifs_inode->open_file_lock);
4835         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4836                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4837                         spin_unlock(&cifs_inode->open_file_lock);
4838                         return 1;
4839                 }
4840         }
4841         spin_unlock(&cifs_inode->open_file_lock);
4842         return 0;
4843 }
4844
4845 /* We do not want to update the file size from server for inodes
4846    open for write - to avoid races with writepage extending
4847    the file - in the future we could consider allowing
4848    refreshing the inode only on increases in the file size
4849    but this is tricky to do without racing with writebehind
4850    page caching in the current Linux kernel design */
4851 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4852                             bool from_readdir)
4853 {
4854         if (!cifsInode)
4855                 return true;
4856
4857         if (is_inode_writable(cifsInode) ||
4858                 ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4859                 /* This inode is open for write at least once */
4860                 struct cifs_sb_info *cifs_sb;
4861
4862                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4863                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4864                         /* since no page cache to corrupt on directio
4865                         we can change size safely */
4866                         return true;
4867                 }
4868
4869                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4870                         return true;
4871
4872                 return false;
4873         } else
4874                 return true;
4875 }
4876
4877 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4878                         loff_t pos, unsigned len,
4879                         struct page **pagep, void **fsdata)
4880 {
4881         int oncethru = 0;
4882         pgoff_t index = pos >> PAGE_SHIFT;
4883         loff_t offset = pos & (PAGE_SIZE - 1);
4884         loff_t page_start = pos & PAGE_MASK;
4885         loff_t i_size;
4886         struct page *page;
4887         int rc = 0;
4888
4889         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4890
4891 start:
4892         page = grab_cache_page_write_begin(mapping, index);
4893         if (!page) {
4894                 rc = -ENOMEM;
4895                 goto out;
4896         }
4897
4898         if (PageUptodate(page))
4899                 goto out;
4900
4901         /*
4902          * If we write a full page it will be up to date, no need to read from
4903          * the server. If the write is short, we'll end up doing a sync write
4904          * instead.
4905          */
4906         if (len == PAGE_SIZE)
4907                 goto out;
4908
4909         /*
4910          * optimize away the read when we have an oplock, and we're not
4911          * expecting to use any of the data we'd be reading in. That
4912          * is, when the page lies beyond the EOF, or straddles the EOF
4913          * and the write will cover all of the existing data.
4914          */
4915         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4916                 i_size = i_size_read(mapping->host);
4917                 if (page_start >= i_size ||
4918                     (offset == 0 && (pos + len) >= i_size)) {
4919                         zero_user_segments(page, 0, offset,
4920                                            offset + len,
4921                                            PAGE_SIZE);
4922                         /*
4923                          * PageChecked means that the parts of the page
4924                          * to which we're not writing are considered up
4925                          * to date. Once the data is copied to the
4926                          * page, it can be set uptodate.
4927                          */
4928                         SetPageChecked(page);
4929                         goto out;
4930                 }
4931         }
4932
4933         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4934                 /*
4935                  * might as well read a page, it is fast enough. If we get
4936                  * an error, we don't need to return it. cifs_write_end will
4937                  * do a sync write instead since PG_uptodate isn't set.
4938                  */
4939                 cifs_readpage_worker(file, page, &page_start);
4940                 put_page(page);
4941                 oncethru = 1;
4942                 goto start;
4943         } else {
4944                 /* we could try using another file handle if there is one -
4945                    but how would we lock it to prevent close of that handle
4946                    racing with this read? In any case
4947                    this will be written out by write_end so is fine */
4948         }
4949 out:
4950         *pagep = page;
4951         return rc;
4952 }
4953
4954 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4955 {
4956         if (folio_test_private(folio))
4957                 return 0;
4958         if (folio_test_fscache(folio)) {
4959                 if (current_is_kswapd() || !(gfp & __GFP_FS))
4960                         return false;
4961                 folio_wait_fscache(folio);
4962         }
4963         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4964         return true;
4965 }
4966
4967 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4968                                  size_t length)
4969 {
4970         folio_wait_fscache(folio);
4971 }
4972
4973 static int cifs_launder_folio(struct folio *folio)
4974 {
4975         int rc = 0;
4976         loff_t range_start = folio_pos(folio);
4977         loff_t range_end = range_start + folio_size(folio);
4978         struct writeback_control wbc = {
4979                 .sync_mode = WB_SYNC_ALL,
4980                 .nr_to_write = 0,
4981                 .range_start = range_start,
4982                 .range_end = range_end,
4983         };
4984
4985         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4986
4987         if (folio_clear_dirty_for_io(folio))
4988                 rc = cifs_writepage_locked(&folio->page, &wbc);
4989
4990         folio_wait_fscache(folio);
4991         return rc;
4992 }
4993
4994 void cifs_oplock_break(struct work_struct *work)
4995 {
4996         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4997                                                   oplock_break);
4998         struct inode *inode = d_inode(cfile->dentry);
4999         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
5000         struct cifsInodeInfo *cinode = CIFS_I(inode);
5001         struct cifs_tcon *tcon;
5002         struct TCP_Server_Info *server;
5003         struct tcon_link *tlink;
5004         int rc = 0;
5005         bool purge_cache = false, oplock_break_cancelled;
5006         __u64 persistent_fid, volatile_fid;
5007         __u16 net_fid;
5008
5009         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5010                         TASK_UNINTERRUPTIBLE);
5011
5012         tlink = cifs_sb_tlink(cifs_sb);
5013         if (IS_ERR(tlink))
5014                 goto out;
5015         tcon = tlink_tcon(tlink);
5016         server = tcon->ses->server;
5017
5018         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5019                                       cfile->oplock_epoch, &purge_cache);
5020
5021         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5022                                                 cifs_has_mand_locks(cinode)) {
5023                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5024                          inode);
5025                 cinode->oplock = 0;
5026         }
5027
5028         if (inode && S_ISREG(inode->i_mode)) {
5029                 if (CIFS_CACHE_READ(cinode))
5030                         break_lease(inode, O_RDONLY);
5031                 else
5032                         break_lease(inode, O_WRONLY);
5033                 rc = filemap_fdatawrite(inode->i_mapping);
5034                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5035                         rc = filemap_fdatawait(inode->i_mapping);
5036                         mapping_set_error(inode->i_mapping, rc);
5037                         cifs_zap_mapping(inode);
5038                 }
5039                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5040                 if (CIFS_CACHE_WRITE(cinode))
5041                         goto oplock_break_ack;
5042         }
5043
5044         rc = cifs_push_locks(cfile);
5045         if (rc)
5046                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5047
5048 oplock_break_ack:
5049         /*
5050          * When oplock break is received and there are no active
5051          * file handles but cached, then schedule deferred close immediately.
5052          * So, new open will not use cached handle.
5053          */
5054
5055         if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5056                 cifs_close_deferred_file(cinode);
5057
5058         persistent_fid = cfile->fid.persistent_fid;
5059         volatile_fid = cfile->fid.volatile_fid;
5060         net_fid = cfile->fid.netfid;
5061         oplock_break_cancelled = cfile->oplock_break_cancelled;
5062
5063         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5064         /*
5065          * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5066          * an acknowledgment to be sent when the file has already been closed.
5067          */
5068         spin_lock(&cinode->open_file_lock);
5069         /* check list empty since can race with kill_sb calling tree disconnect */
5070         if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5071                 spin_unlock(&cinode->open_file_lock);
5072                 rc = server->ops->oplock_response(tcon, persistent_fid,
5073                                                   volatile_fid, net_fid, cinode);
5074                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5075         } else
5076                 spin_unlock(&cinode->open_file_lock);
5077
5078         cifs_put_tlink(tlink);
5079 out:
5080         cifs_done_oplock_break(cinode);
5081 }
5082
5083 /*
5084  * The presence of cifs_direct_io() in the address space ops vector
5085  * allowes open() O_DIRECT flags which would have failed otherwise.
5086  *
5087  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5088  * so this method should never be called.
5089  *
5090  * Direct IO is not yet supported in the cached mode.
5091  */
5092 static ssize_t
5093 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5094 {
5095         /*
5096          * FIXME
5097          * Eventually need to support direct IO for non forcedirectio mounts
5098          */
5099         return -EINVAL;
5100 }
5101
5102 static int cifs_swap_activate(struct swap_info_struct *sis,
5103                               struct file *swap_file, sector_t *span)
5104 {
5105         struct cifsFileInfo *cfile = swap_file->private_data;
5106         struct inode *inode = swap_file->f_mapping->host;
5107         unsigned long blocks;
5108         long long isize;
5109
5110         cifs_dbg(FYI, "swap activate\n");
5111
5112         if (!swap_file->f_mapping->a_ops->swap_rw)
5113                 /* Cannot support swap */
5114                 return -EINVAL;
5115
5116         spin_lock(&inode->i_lock);
5117         blocks = inode->i_blocks;
5118         isize = inode->i_size;
5119         spin_unlock(&inode->i_lock);
5120         if (blocks*512 < isize) {
5121                 pr_warn("swap activate: swapfile has holes\n");
5122                 return -EINVAL;
5123         }
5124         *span = sis->pages;
5125
5126         pr_warn_once("Swap support over SMB3 is experimental\n");
5127
5128         /*
5129          * TODO: consider adding ACL (or documenting how) to prevent other
5130          * users (on this or other systems) from reading it
5131          */
5132
5133
5134         /* TODO: add sk_set_memalloc(inet) or similar */
5135
5136         if (cfile)
5137                 cfile->swapfile = true;
5138         /*
5139          * TODO: Since file already open, we can't open with DENY_ALL here
5140          * but we could add call to grab a byte range lock to prevent others
5141          * from reading or writing the file
5142          */
5143
5144         sis->flags |= SWP_FS_OPS;
5145         return add_swap_extent(sis, 0, sis->max, 0);
5146 }
5147
5148 static void cifs_swap_deactivate(struct file *file)
5149 {
5150         struct cifsFileInfo *cfile = file->private_data;
5151
5152         cifs_dbg(FYI, "swap deactivate\n");
5153
5154         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5155
5156         if (cfile)
5157                 cfile->swapfile = false;
5158
5159         /* do we need to unpin (or unlock) the file */
5160 }
5161
5162 const struct address_space_operations cifs_addr_ops = {
5163         .read_folio = cifs_read_folio,
5164         .readahead = cifs_readahead,
5165         .writepages = cifs_writepages,
5166         .write_begin = cifs_write_begin,
5167         .write_end = cifs_write_end,
5168         .dirty_folio = netfs_dirty_folio,
5169         .release_folio = cifs_release_folio,
5170         .direct_IO = cifs_direct_io,
5171         .invalidate_folio = cifs_invalidate_folio,
5172         .launder_folio = cifs_launder_folio,
5173         .migrate_folio = filemap_migrate_folio,
5174         /*
5175          * TODO: investigate and if useful we could add an is_dirty_writeback
5176          * helper if needed
5177          */
5178         .swap_activate = cifs_swap_activate,
5179         .swap_deactivate = cifs_swap_deactivate,
5180 };
5181
5182 /*
5183  * cifs_readahead requires the server to support a buffer large enough to
5184  * contain the header plus one complete page of data.  Otherwise, we need
5185  * to leave cifs_readahead out of the address space operations.
5186  */
5187 const struct address_space_operations cifs_addr_ops_smallbuf = {
5188         .read_folio = cifs_read_folio,
5189         .writepages = cifs_writepages,
5190         .write_begin = cifs_write_begin,
5191         .write_end = cifs_write_end,
5192         .dirty_folio = netfs_dirty_folio,
5193         .release_folio = cifs_release_folio,
5194         .invalidate_folio = cifs_invalidate_folio,
5195         .launder_folio = cifs_launder_folio,
5196         .migrate_folio = filemap_migrate_folio,
5197 };