Mention branches and keyring.
[releases.git] / smb / client / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "smb2proto.h"
30 #include "cifs_unicode.h"
31 #include "cifs_debug.h"
32 #include "cifs_fs_sb.h"
33 #include "fscache.h"
34 #include "smbdirect.h"
35 #include "fs_context.h"
36 #include "cifs_ioctl.h"
37 #include "cached_dir.h"
38
39 /*
40  * Mark as invalid, all open files on tree connections since they
41  * were closed when session to server was lost.
42  */
43 void
44 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
45 {
46         struct cifsFileInfo *open_file = NULL;
47         struct list_head *tmp;
48         struct list_head *tmp1;
49
50         /* only send once per connect */
51         spin_lock(&tcon->ses->ses_lock);
52         if ((tcon->ses->ses_status != SES_GOOD) || (tcon->status != TID_NEED_RECON)) {
53                 spin_unlock(&tcon->ses->ses_lock);
54                 return;
55         }
56         tcon->status = TID_IN_FILES_INVALIDATE;
57         spin_unlock(&tcon->ses->ses_lock);
58
59         /* list all files open on tree connection and mark them invalid */
60         spin_lock(&tcon->open_file_lock);
61         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
62                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
63                 open_file->invalidHandle = true;
64                 open_file->oplock_break_cancelled = true;
65         }
66         spin_unlock(&tcon->open_file_lock);
67
68         invalidate_all_cached_dirs(tcon);
69         spin_lock(&tcon->tc_lock);
70         if (tcon->status == TID_IN_FILES_INVALIDATE)
71                 tcon->status = TID_NEED_TCON;
72         spin_unlock(&tcon->tc_lock);
73
74         /*
75          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
76          * to this tcon.
77          */
78 }
79
80 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
81 {
82         if ((flags & O_ACCMODE) == O_RDONLY)
83                 return GENERIC_READ;
84         else if ((flags & O_ACCMODE) == O_WRONLY)
85                 return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
86         else if ((flags & O_ACCMODE) == O_RDWR) {
87                 /* GENERIC_ALL is too much permission to request
88                    can cause unnecessary access denied on create */
89                 /* return GENERIC_ALL; */
90                 return (GENERIC_READ | GENERIC_WRITE);
91         }
92
93         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
94                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
95                 FILE_READ_DATA);
96 }
97
98 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
99 static u32 cifs_posix_convert_flags(unsigned int flags)
100 {
101         u32 posix_flags = 0;
102
103         if ((flags & O_ACCMODE) == O_RDONLY)
104                 posix_flags = SMB_O_RDONLY;
105         else if ((flags & O_ACCMODE) == O_WRONLY)
106                 posix_flags = SMB_O_WRONLY;
107         else if ((flags & O_ACCMODE) == O_RDWR)
108                 posix_flags = SMB_O_RDWR;
109
110         if (flags & O_CREAT) {
111                 posix_flags |= SMB_O_CREAT;
112                 if (flags & O_EXCL)
113                         posix_flags |= SMB_O_EXCL;
114         } else if (flags & O_EXCL)
115                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
116                          current->comm, current->tgid);
117
118         if (flags & O_TRUNC)
119                 posix_flags |= SMB_O_TRUNC;
120         /* be safe and imply O_SYNC for O_DSYNC */
121         if (flags & O_DSYNC)
122                 posix_flags |= SMB_O_SYNC;
123         if (flags & O_DIRECTORY)
124                 posix_flags |= SMB_O_DIRECTORY;
125         if (flags & O_NOFOLLOW)
126                 posix_flags |= SMB_O_NOFOLLOW;
127         if (flags & O_DIRECT)
128                 posix_flags |= SMB_O_DIRECT;
129
130         return posix_flags;
131 }
132 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
133
134 static inline int cifs_get_disposition(unsigned int flags)
135 {
136         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
137                 return FILE_CREATE;
138         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
139                 return FILE_OVERWRITE_IF;
140         else if ((flags & O_CREAT) == O_CREAT)
141                 return FILE_OPEN_IF;
142         else if ((flags & O_TRUNC) == O_TRUNC)
143                 return FILE_OVERWRITE;
144         else
145                 return FILE_OPEN;
146 }
147
148 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
149 int cifs_posix_open(const char *full_path, struct inode **pinode,
150                         struct super_block *sb, int mode, unsigned int f_flags,
151                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
152 {
153         int rc;
154         FILE_UNIX_BASIC_INFO *presp_data;
155         __u32 posix_flags = 0;
156         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
157         struct cifs_fattr fattr;
158         struct tcon_link *tlink;
159         struct cifs_tcon *tcon;
160
161         cifs_dbg(FYI, "posix open %s\n", full_path);
162
163         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
164         if (presp_data == NULL)
165                 return -ENOMEM;
166
167         tlink = cifs_sb_tlink(cifs_sb);
168         if (IS_ERR(tlink)) {
169                 rc = PTR_ERR(tlink);
170                 goto posix_open_ret;
171         }
172
173         tcon = tlink_tcon(tlink);
174         mode &= ~current_umask();
175
176         posix_flags = cifs_posix_convert_flags(f_flags);
177         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
178                              poplock, full_path, cifs_sb->local_nls,
179                              cifs_remap(cifs_sb));
180         cifs_put_tlink(tlink);
181
182         if (rc)
183                 goto posix_open_ret;
184
185         if (presp_data->Type == cpu_to_le32(-1))
186                 goto posix_open_ret; /* open ok, caller does qpathinfo */
187
188         if (!pinode)
189                 goto posix_open_ret; /* caller does not need info */
190
191         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
192
193         /* get new inode and set it up */
194         if (*pinode == NULL) {
195                 cifs_fill_uniqueid(sb, &fattr);
196                 *pinode = cifs_iget(sb, &fattr);
197                 if (!*pinode) {
198                         rc = -ENOMEM;
199                         goto posix_open_ret;
200                 }
201         } else {
202                 cifs_revalidate_mapping(*pinode);
203                 rc = cifs_fattr_to_inode(*pinode, &fattr);
204         }
205
206 posix_open_ret:
207         kfree(presp_data);
208         return rc;
209 }
210 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
211
212 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
213                         struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
214                         struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
215 {
216         int rc;
217         int desired_access;
218         int disposition;
219         int create_options = CREATE_NOT_DIR;
220         struct TCP_Server_Info *server = tcon->ses->server;
221         struct cifs_open_parms oparms;
222         int rdwr_for_fscache = 0;
223
224         if (!server->ops->open)
225                 return -ENOSYS;
226
227         /* If we're caching, we need to be able to fill in around partial writes. */
228         if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
229                 rdwr_for_fscache = 1;
230
231         desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
232
233 /*********************************************************************
234  *  open flag mapping table:
235  *
236  *      POSIX Flag            CIFS Disposition
237  *      ----------            ----------------
238  *      O_CREAT               FILE_OPEN_IF
239  *      O_CREAT | O_EXCL      FILE_CREATE
240  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
241  *      O_TRUNC               FILE_OVERWRITE
242  *      none of the above     FILE_OPEN
243  *
244  *      Note that there is not a direct match between disposition
245  *      FILE_SUPERSEDE (ie create whether or not file exists although
246  *      O_CREAT | O_TRUNC is similar but truncates the existing
247  *      file rather than creating a new file as FILE_SUPERSEDE does
248  *      (which uses the attributes / metadata passed in on open call)
249  *?
250  *?  O_SYNC is a reasonable match to CIFS writethrough flag
251  *?  and the read write flags match reasonably.  O_LARGEFILE
252  *?  is irrelevant because largefile support is always used
253  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
254  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
255  *********************************************************************/
256
257         disposition = cifs_get_disposition(f_flags);
258
259         /* BB pass O_SYNC flag through on file attributes .. BB */
260
261         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
262         if (f_flags & O_SYNC)
263                 create_options |= CREATE_WRITE_THROUGH;
264
265         if (f_flags & O_DIRECT)
266                 create_options |= CREATE_NO_BUFFER;
267
268 retry_open:
269         oparms = (struct cifs_open_parms) {
270                 .tcon = tcon,
271                 .cifs_sb = cifs_sb,
272                 .desired_access = desired_access,
273                 .create_options = cifs_create_options(cifs_sb, create_options),
274                 .disposition = disposition,
275                 .path = full_path,
276                 .fid = fid,
277         };
278
279         rc = server->ops->open(xid, &oparms, oplock, buf);
280         if (rc) {
281                 if (rc == -EACCES && rdwr_for_fscache == 1) {
282                         desired_access = cifs_convert_flags(f_flags, 0);
283                         rdwr_for_fscache = 2;
284                         goto retry_open;
285                 }
286                 return rc;
287         }
288         if (rdwr_for_fscache == 2)
289                 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
290
291         /* TODO: Add support for calling posix query info but with passing in fid */
292         if (tcon->unix_ext)
293                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
294                                               xid);
295         else
296                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
297                                          xid, fid);
298
299         if (rc) {
300                 server->ops->close(xid, tcon, fid);
301                 if (rc == -ESTALE)
302                         rc = -EOPENSTALE;
303         }
304
305         return rc;
306 }
307
308 static bool
309 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
310 {
311         struct cifs_fid_locks *cur;
312         bool has_locks = false;
313
314         down_read(&cinode->lock_sem);
315         list_for_each_entry(cur, &cinode->llist, llist) {
316                 if (!list_empty(&cur->locks)) {
317                         has_locks = true;
318                         break;
319                 }
320         }
321         up_read(&cinode->lock_sem);
322         return has_locks;
323 }
324
325 void
326 cifs_down_write(struct rw_semaphore *sem)
327 {
328         while (!down_write_trylock(sem))
329                 msleep(10);
330 }
331
332 static void cifsFileInfo_put_work(struct work_struct *work);
333 void serverclose_work(struct work_struct *work);
334
335 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
336                                        struct tcon_link *tlink, __u32 oplock,
337                                        const char *symlink_target)
338 {
339         struct dentry *dentry = file_dentry(file);
340         struct inode *inode = d_inode(dentry);
341         struct cifsInodeInfo *cinode = CIFS_I(inode);
342         struct cifsFileInfo *cfile;
343         struct cifs_fid_locks *fdlocks;
344         struct cifs_tcon *tcon = tlink_tcon(tlink);
345         struct TCP_Server_Info *server = tcon->ses->server;
346
347         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
348         if (cfile == NULL)
349                 return cfile;
350
351         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
352         if (!fdlocks) {
353                 kfree(cfile);
354                 return NULL;
355         }
356
357         if (symlink_target) {
358                 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
359                 if (!cfile->symlink_target) {
360                         kfree(fdlocks);
361                         kfree(cfile);
362                         return NULL;
363                 }
364         }
365
366         INIT_LIST_HEAD(&fdlocks->locks);
367         fdlocks->cfile = cfile;
368         cfile->llist = fdlocks;
369
370         cfile->count = 1;
371         cfile->pid = current->tgid;
372         cfile->uid = current_fsuid();
373         cfile->dentry = dget(dentry);
374         cfile->f_flags = file->f_flags;
375         cfile->invalidHandle = false;
376         cfile->deferred_close_scheduled = false;
377         cfile->tlink = cifs_get_tlink(tlink);
378         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
379         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
380         INIT_WORK(&cfile->serverclose, serverclose_work);
381         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
382         mutex_init(&cfile->fh_mutex);
383         spin_lock_init(&cfile->file_info_lock);
384
385         cifs_sb_active(inode->i_sb);
386
387         /*
388          * If the server returned a read oplock and we have mandatory brlocks,
389          * set oplock level to None.
390          */
391         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
392                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
393                 oplock = 0;
394         }
395
396         cifs_down_write(&cinode->lock_sem);
397         list_add(&fdlocks->llist, &cinode->llist);
398         up_write(&cinode->lock_sem);
399
400         spin_lock(&tcon->open_file_lock);
401         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
402                 oplock = fid->pending_open->oplock;
403         list_del(&fid->pending_open->olist);
404
405         fid->purge_cache = false;
406         server->ops->set_fid(cfile, fid, oplock);
407
408         list_add(&cfile->tlist, &tcon->openFileList);
409         atomic_inc(&tcon->num_local_opens);
410
411         /* if readable file instance put first in list*/
412         spin_lock(&cinode->open_file_lock);
413         if (file->f_mode & FMODE_READ)
414                 list_add(&cfile->flist, &cinode->openFileList);
415         else
416                 list_add_tail(&cfile->flist, &cinode->openFileList);
417         spin_unlock(&cinode->open_file_lock);
418         spin_unlock(&tcon->open_file_lock);
419
420         if (fid->purge_cache)
421                 cifs_zap_mapping(inode);
422
423         file->private_data = cfile;
424         return cfile;
425 }
426
427 struct cifsFileInfo *
428 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
429 {
430         spin_lock(&cifs_file->file_info_lock);
431         cifsFileInfo_get_locked(cifs_file);
432         spin_unlock(&cifs_file->file_info_lock);
433         return cifs_file;
434 }
435
436 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
437 {
438         struct inode *inode = d_inode(cifs_file->dentry);
439         struct cifsInodeInfo *cifsi = CIFS_I(inode);
440         struct cifsLockInfo *li, *tmp;
441         struct super_block *sb = inode->i_sb;
442
443         /*
444          * Delete any outstanding lock records. We'll lose them when the file
445          * is closed anyway.
446          */
447         cifs_down_write(&cifsi->lock_sem);
448         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
449                 list_del(&li->llist);
450                 cifs_del_lock_waiters(li);
451                 kfree(li);
452         }
453         list_del(&cifs_file->llist->llist);
454         kfree(cifs_file->llist);
455         up_write(&cifsi->lock_sem);
456
457         cifs_put_tlink(cifs_file->tlink);
458         dput(cifs_file->dentry);
459         cifs_sb_deactive(sb);
460         kfree(cifs_file->symlink_target);
461         kfree(cifs_file);
462 }
463
464 static void cifsFileInfo_put_work(struct work_struct *work)
465 {
466         struct cifsFileInfo *cifs_file = container_of(work,
467                         struct cifsFileInfo, put);
468
469         cifsFileInfo_put_final(cifs_file);
470 }
471
472 void serverclose_work(struct work_struct *work)
473 {
474         struct cifsFileInfo *cifs_file = container_of(work,
475                         struct cifsFileInfo, serverclose);
476
477         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
478
479         struct TCP_Server_Info *server = tcon->ses->server;
480         int rc = 0;
481         int retries = 0;
482         int MAX_RETRIES = 4;
483
484         do {
485                 if (server->ops->close_getattr)
486                         rc = server->ops->close_getattr(0, tcon, cifs_file);
487                 else if (server->ops->close)
488                         rc = server->ops->close(0, tcon, &cifs_file->fid);
489
490                 if (rc == -EBUSY || rc == -EAGAIN) {
491                         retries++;
492                         msleep(250);
493                 }
494         } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
495         );
496
497         if (retries == MAX_RETRIES)
498                 pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
499
500         if (cifs_file->offload)
501                 queue_work(fileinfo_put_wq, &cifs_file->put);
502         else
503                 cifsFileInfo_put_final(cifs_file);
504 }
505
506 /**
507  * cifsFileInfo_put - release a reference of file priv data
508  *
509  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
510  *
511  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
512  */
513 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
514 {
515         _cifsFileInfo_put(cifs_file, true, true);
516 }
517
518 /**
519  * _cifsFileInfo_put - release a reference of file priv data
520  *
521  * This may involve closing the filehandle @cifs_file out on the
522  * server. Must be called without holding tcon->open_file_lock,
523  * cinode->open_file_lock and cifs_file->file_info_lock.
524  *
525  * If @wait_for_oplock_handler is true and we are releasing the last
526  * reference, wait for any running oplock break handler of the file
527  * and cancel any pending one.
528  *
529  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
530  * @wait_oplock_handler: must be false if called from oplock_break_handler
531  * @offload:    not offloaded on close and oplock breaks
532  *
533  */
534 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
535                        bool wait_oplock_handler, bool offload)
536 {
537         struct inode *inode = d_inode(cifs_file->dentry);
538         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
539         struct TCP_Server_Info *server = tcon->ses->server;
540         struct cifsInodeInfo *cifsi = CIFS_I(inode);
541         struct super_block *sb = inode->i_sb;
542         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
543         struct cifs_fid fid = {};
544         struct cifs_pending_open open;
545         bool oplock_break_cancelled;
546         bool serverclose_offloaded = false;
547
548         spin_lock(&tcon->open_file_lock);
549         spin_lock(&cifsi->open_file_lock);
550         spin_lock(&cifs_file->file_info_lock);
551
552         cifs_file->offload = offload;
553         if (--cifs_file->count > 0) {
554                 spin_unlock(&cifs_file->file_info_lock);
555                 spin_unlock(&cifsi->open_file_lock);
556                 spin_unlock(&tcon->open_file_lock);
557                 return;
558         }
559         spin_unlock(&cifs_file->file_info_lock);
560
561         if (server->ops->get_lease_key)
562                 server->ops->get_lease_key(inode, &fid);
563
564         /* store open in pending opens to make sure we don't miss lease break */
565         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
566
567         /* remove it from the lists */
568         list_del(&cifs_file->flist);
569         list_del(&cifs_file->tlist);
570         atomic_dec(&tcon->num_local_opens);
571
572         if (list_empty(&cifsi->openFileList)) {
573                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
574                          d_inode(cifs_file->dentry));
575                 /*
576                  * In strict cache mode we need invalidate mapping on the last
577                  * close  because it may cause a error when we open this file
578                  * again and get at least level II oplock.
579                  */
580                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
581                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
582                 cifs_set_oplock_level(cifsi, 0);
583         }
584
585         spin_unlock(&cifsi->open_file_lock);
586         spin_unlock(&tcon->open_file_lock);
587
588         oplock_break_cancelled = wait_oplock_handler ?
589                 cancel_work_sync(&cifs_file->oplock_break) : false;
590
591         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
592                 struct TCP_Server_Info *server = tcon->ses->server;
593                 unsigned int xid;
594                 int rc = 0;
595
596                 xid = get_xid();
597                 if (server->ops->close_getattr)
598                         rc = server->ops->close_getattr(xid, tcon, cifs_file);
599                 else if (server->ops->close)
600                         rc = server->ops->close(xid, tcon, &cifs_file->fid);
601                 _free_xid(xid);
602
603                 if (rc == -EBUSY || rc == -EAGAIN) {
604                         // Server close failed, hence offloading it as an async op
605                         queue_work(serverclose_wq, &cifs_file->serverclose);
606                         serverclose_offloaded = true;
607                 }
608         }
609
610         if (oplock_break_cancelled)
611                 cifs_done_oplock_break(cifsi);
612
613         cifs_del_pending_open(&open);
614
615         // if serverclose has been offloaded to wq (on failure), it will
616         // handle offloading put as well. If serverclose not offloaded,
617         // we need to handle offloading put here.
618         if (!serverclose_offloaded) {
619                 if (offload)
620                         queue_work(fileinfo_put_wq, &cifs_file->put);
621                 else
622                         cifsFileInfo_put_final(cifs_file);
623         }
624 }
625
626 int cifs_open(struct inode *inode, struct file *file)
627
628 {
629         int rc = -EACCES;
630         unsigned int xid;
631         __u32 oplock;
632         struct cifs_sb_info *cifs_sb;
633         struct TCP_Server_Info *server;
634         struct cifs_tcon *tcon;
635         struct tcon_link *tlink;
636         struct cifsFileInfo *cfile = NULL;
637         void *page;
638         const char *full_path;
639         bool posix_open_ok = false;
640         struct cifs_fid fid = {};
641         struct cifs_pending_open open;
642         struct cifs_open_info_data data = {};
643
644         xid = get_xid();
645
646         cifs_sb = CIFS_SB(inode->i_sb);
647         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
648                 free_xid(xid);
649                 return -EIO;
650         }
651
652         tlink = cifs_sb_tlink(cifs_sb);
653         if (IS_ERR(tlink)) {
654                 free_xid(xid);
655                 return PTR_ERR(tlink);
656         }
657         tcon = tlink_tcon(tlink);
658         server = tcon->ses->server;
659
660         page = alloc_dentry_path();
661         full_path = build_path_from_dentry(file_dentry(file), page);
662         if (IS_ERR(full_path)) {
663                 rc = PTR_ERR(full_path);
664                 goto out;
665         }
666
667         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
668                  inode, file->f_flags, full_path);
669
670         if (file->f_flags & O_DIRECT &&
671             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
672                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
673                         file->f_op = &cifs_file_direct_nobrl_ops;
674                 else
675                         file->f_op = &cifs_file_direct_ops;
676         }
677
678         /* Get the cached handle as SMB2 close is deferred */
679         rc = cifs_get_readable_path(tcon, full_path, &cfile);
680         if (rc == 0) {
681                 if (file->f_flags == cfile->f_flags) {
682                         file->private_data = cfile;
683                         spin_lock(&CIFS_I(inode)->deferred_lock);
684                         cifs_del_deferred_close(cfile);
685                         spin_unlock(&CIFS_I(inode)->deferred_lock);
686                         goto use_cache;
687                 } else {
688                         _cifsFileInfo_put(cfile, true, false);
689                 }
690         }
691
692         if (server->oplocks)
693                 oplock = REQ_OPLOCK;
694         else
695                 oplock = 0;
696
697 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
698         if (!tcon->broken_posix_open && tcon->unix_ext &&
699             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
700                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
701                 /* can not refresh inode info since size could be stale */
702                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
703                                 cifs_sb->ctx->file_mode /* ignored */,
704                                 file->f_flags, &oplock, &fid.netfid, xid);
705                 if (rc == 0) {
706                         cifs_dbg(FYI, "posix open succeeded\n");
707                         posix_open_ok = true;
708                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
709                         if (tcon->ses->serverNOS)
710                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
711                                          tcon->ses->ip_addr,
712                                          tcon->ses->serverNOS);
713                         tcon->broken_posix_open = true;
714                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
715                          (rc != -EOPNOTSUPP)) /* path not found or net err */
716                         goto out;
717                 /*
718                  * Else fallthrough to retry open the old way on network i/o
719                  * or DFS errors.
720                  */
721         }
722 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
723
724         if (server->ops->get_lease_key)
725                 server->ops->get_lease_key(inode, &fid);
726
727         cifs_add_pending_open(&fid, tlink, &open);
728
729         if (!posix_open_ok) {
730                 if (server->ops->get_lease_key)
731                         server->ops->get_lease_key(inode, &fid);
732
733                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
734                                   xid, &data);
735                 if (rc) {
736                         cifs_del_pending_open(&open);
737                         goto out;
738                 }
739         }
740
741         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
742         if (cfile == NULL) {
743                 if (server->ops->close)
744                         server->ops->close(xid, tcon, &fid);
745                 cifs_del_pending_open(&open);
746                 rc = -ENOMEM;
747                 goto out;
748         }
749
750 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
751         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
752                 /*
753                  * Time to set mode which we can not set earlier due to
754                  * problems creating new read-only files.
755                  */
756                 struct cifs_unix_set_info_args args = {
757                         .mode   = inode->i_mode,
758                         .uid    = INVALID_UID, /* no change */
759                         .gid    = INVALID_GID, /* no change */
760                         .ctime  = NO_CHANGE_64,
761                         .atime  = NO_CHANGE_64,
762                         .mtime  = NO_CHANGE_64,
763                         .device = 0,
764                 };
765                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
766                                        cfile->pid);
767         }
768 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
769
770 use_cache:
771         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
772                            file->f_mode & FMODE_WRITE);
773         if (!(file->f_flags & O_DIRECT))
774                 goto out;
775         if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
776                 goto out;
777         cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
778
779 out:
780         free_dentry_path(page);
781         free_xid(xid);
782         cifs_put_tlink(tlink);
783         cifs_free_open_info(&data);
784         return rc;
785 }
786
787 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
788 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
789 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
790
791 /*
792  * Try to reacquire byte range locks that were released when session
793  * to server was lost.
794  */
795 static int
796 cifs_relock_file(struct cifsFileInfo *cfile)
797 {
798         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
799         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
800         int rc = 0;
801 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
802         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
803 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
804
805         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
806         if (cinode->can_cache_brlcks) {
807                 /* can cache locks - no need to relock */
808                 up_read(&cinode->lock_sem);
809                 return rc;
810         }
811
812 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
813         if (cap_unix(tcon->ses) &&
814             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
815             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
816                 rc = cifs_push_posix_locks(cfile);
817         else
818 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
819                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
820
821         up_read(&cinode->lock_sem);
822         return rc;
823 }
824
825 static int
826 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
827 {
828         int rc = -EACCES;
829         unsigned int xid;
830         __u32 oplock;
831         struct cifs_sb_info *cifs_sb;
832         struct cifs_tcon *tcon;
833         struct TCP_Server_Info *server;
834         struct cifsInodeInfo *cinode;
835         struct inode *inode;
836         void *page;
837         const char *full_path;
838         int desired_access;
839         int disposition = FILE_OPEN;
840         int create_options = CREATE_NOT_DIR;
841         struct cifs_open_parms oparms;
842         int rdwr_for_fscache = 0;
843
844         xid = get_xid();
845         mutex_lock(&cfile->fh_mutex);
846         if (!cfile->invalidHandle) {
847                 mutex_unlock(&cfile->fh_mutex);
848                 free_xid(xid);
849                 return 0;
850         }
851
852         inode = d_inode(cfile->dentry);
853         cifs_sb = CIFS_SB(inode->i_sb);
854         tcon = tlink_tcon(cfile->tlink);
855         server = tcon->ses->server;
856
857         /*
858          * Can not grab rename sem here because various ops, including those
859          * that already have the rename sem can end up causing writepage to get
860          * called and if the server was down that means we end up here, and we
861          * can never tell if the caller already has the rename_sem.
862          */
863         page = alloc_dentry_path();
864         full_path = build_path_from_dentry(cfile->dentry, page);
865         if (IS_ERR(full_path)) {
866                 mutex_unlock(&cfile->fh_mutex);
867                 free_dentry_path(page);
868                 free_xid(xid);
869                 return PTR_ERR(full_path);
870         }
871
872         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
873                  inode, cfile->f_flags, full_path);
874
875         if (tcon->ses->server->oplocks)
876                 oplock = REQ_OPLOCK;
877         else
878                 oplock = 0;
879
880 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
881         if (tcon->unix_ext && cap_unix(tcon->ses) &&
882             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
883                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
884                 /*
885                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
886                  * original open. Must mask them off for a reopen.
887                  */
888                 unsigned int oflags = cfile->f_flags &
889                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
890
891                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
892                                      cifs_sb->ctx->file_mode /* ignored */,
893                                      oflags, &oplock, &cfile->fid.netfid, xid);
894                 if (rc == 0) {
895                         cifs_dbg(FYI, "posix reopen succeeded\n");
896                         oparms.reconnect = true;
897                         goto reopen_success;
898                 }
899                 /*
900                  * fallthrough to retry open the old way on errors, especially
901                  * in the reconnect path it is important to retry hard
902                  */
903         }
904 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
905
906         /* If we're caching, we need to be able to fill in around partial writes. */
907         if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
908                 rdwr_for_fscache = 1;
909
910         desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
911
912         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
913         if (cfile->f_flags & O_SYNC)
914                 create_options |= CREATE_WRITE_THROUGH;
915
916         if (cfile->f_flags & O_DIRECT)
917                 create_options |= CREATE_NO_BUFFER;
918
919         if (server->ops->get_lease_key)
920                 server->ops->get_lease_key(inode, &cfile->fid);
921
922 retry_open:
923         oparms = (struct cifs_open_parms) {
924                 .tcon = tcon,
925                 .cifs_sb = cifs_sb,
926                 .desired_access = desired_access,
927                 .create_options = cifs_create_options(cifs_sb, create_options),
928                 .disposition = disposition,
929                 .path = full_path,
930                 .fid = &cfile->fid,
931                 .reconnect = true,
932         };
933
934         /*
935          * Can not refresh inode by passing in file_info buf to be returned by
936          * ops->open and then calling get_inode_info with returned buf since
937          * file might have write behind data that needs to be flushed and server
938          * version of file size can be stale. If we knew for sure that inode was
939          * not dirty locally we could do this.
940          */
941         rc = server->ops->open(xid, &oparms, &oplock, NULL);
942         if (rc == -ENOENT && oparms.reconnect == false) {
943                 /* durable handle timeout is expired - open the file again */
944                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
945                 /* indicate that we need to relock the file */
946                 oparms.reconnect = true;
947         }
948         if (rc == -EACCES && rdwr_for_fscache == 1) {
949                 desired_access = cifs_convert_flags(cfile->f_flags, 0);
950                 rdwr_for_fscache = 2;
951                 goto retry_open;
952         }
953
954         if (rc) {
955                 mutex_unlock(&cfile->fh_mutex);
956                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
957                 cifs_dbg(FYI, "oplock: %d\n", oplock);
958                 goto reopen_error_exit;
959         }
960
961         if (rdwr_for_fscache == 2)
962                 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
963
964 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
965 reopen_success:
966 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
967         cfile->invalidHandle = false;
968         mutex_unlock(&cfile->fh_mutex);
969         cinode = CIFS_I(inode);
970
971         if (can_flush) {
972                 rc = filemap_write_and_wait(inode->i_mapping);
973                 if (!is_interrupt_error(rc))
974                         mapping_set_error(inode->i_mapping, rc);
975
976                 if (tcon->posix_extensions)
977                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
978                 else if (tcon->unix_ext)
979                         rc = cifs_get_inode_info_unix(&inode, full_path,
980                                                       inode->i_sb, xid);
981                 else
982                         rc = cifs_get_inode_info(&inode, full_path, NULL,
983                                                  inode->i_sb, xid, NULL);
984         }
985         /*
986          * Else we are writing out data to server already and could deadlock if
987          * we tried to flush data, and since we do not know if we have data that
988          * would invalidate the current end of file on the server we can not go
989          * to the server to get the new inode info.
990          */
991
992         /*
993          * If the server returned a read oplock and we have mandatory brlocks,
994          * set oplock level to None.
995          */
996         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
997                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
998                 oplock = 0;
999         }
1000
1001         server->ops->set_fid(cfile, &cfile->fid, oplock);
1002         if (oparms.reconnect)
1003                 cifs_relock_file(cfile);
1004
1005 reopen_error_exit:
1006         free_dentry_path(page);
1007         free_xid(xid);
1008         return rc;
1009 }
1010
1011 void smb2_deferred_work_close(struct work_struct *work)
1012 {
1013         struct cifsFileInfo *cfile = container_of(work,
1014                         struct cifsFileInfo, deferred.work);
1015
1016         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1017         cifs_del_deferred_close(cfile);
1018         cfile->deferred_close_scheduled = false;
1019         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1020         _cifsFileInfo_put(cfile, true, false);
1021 }
1022
1023 int cifs_close(struct inode *inode, struct file *file)
1024 {
1025         struct cifsFileInfo *cfile;
1026         struct cifsInodeInfo *cinode = CIFS_I(inode);
1027         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1028         struct cifs_deferred_close *dclose;
1029
1030         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1031
1032         if (file->private_data != NULL) {
1033                 cfile = file->private_data;
1034                 file->private_data = NULL;
1035                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1036                 if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1037                     && cinode->lease_granted &&
1038                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1039                     dclose) {
1040                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1041                                 inode->i_ctime = inode->i_mtime = current_time(inode);
1042                         }
1043                         spin_lock(&cinode->deferred_lock);
1044                         cifs_add_deferred_close(cfile, dclose);
1045                         if (cfile->deferred_close_scheduled &&
1046                             delayed_work_pending(&cfile->deferred)) {
1047                                 /*
1048                                  * If there is no pending work, mod_delayed_work queues new work.
1049                                  * So, Increase the ref count to avoid use-after-free.
1050                                  */
1051                                 if (!mod_delayed_work(deferredclose_wq,
1052                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
1053                                         cifsFileInfo_get(cfile);
1054                         } else {
1055                                 /* Deferred close for files */
1056                                 queue_delayed_work(deferredclose_wq,
1057                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
1058                                 cfile->deferred_close_scheduled = true;
1059                                 spin_unlock(&cinode->deferred_lock);
1060                                 return 0;
1061                         }
1062                         spin_unlock(&cinode->deferred_lock);
1063                         _cifsFileInfo_put(cfile, true, false);
1064                 } else {
1065                         _cifsFileInfo_put(cfile, true, false);
1066                         kfree(dclose);
1067                 }
1068         }
1069
1070         /* return code from the ->release op is always ignored */
1071         return 0;
1072 }
1073
1074 void
1075 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1076 {
1077         struct cifsFileInfo *open_file, *tmp;
1078         struct list_head tmp_list;
1079
1080         if (!tcon->use_persistent || !tcon->need_reopen_files)
1081                 return;
1082
1083         tcon->need_reopen_files = false;
1084
1085         cifs_dbg(FYI, "Reopen persistent handles\n");
1086         INIT_LIST_HEAD(&tmp_list);
1087
1088         /* list all files open on tree connection, reopen resilient handles  */
1089         spin_lock(&tcon->open_file_lock);
1090         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1091                 if (!open_file->invalidHandle)
1092                         continue;
1093                 cifsFileInfo_get(open_file);
1094                 list_add_tail(&open_file->rlist, &tmp_list);
1095         }
1096         spin_unlock(&tcon->open_file_lock);
1097
1098         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1099                 if (cifs_reopen_file(open_file, false /* do not flush */))
1100                         tcon->need_reopen_files = true;
1101                 list_del_init(&open_file->rlist);
1102                 cifsFileInfo_put(open_file);
1103         }
1104 }
1105
1106 int cifs_closedir(struct inode *inode, struct file *file)
1107 {
1108         int rc = 0;
1109         unsigned int xid;
1110         struct cifsFileInfo *cfile = file->private_data;
1111         struct cifs_tcon *tcon;
1112         struct TCP_Server_Info *server;
1113         char *buf;
1114
1115         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1116
1117         if (cfile == NULL)
1118                 return rc;
1119
1120         xid = get_xid();
1121         tcon = tlink_tcon(cfile->tlink);
1122         server = tcon->ses->server;
1123
1124         cifs_dbg(FYI, "Freeing private data in close dir\n");
1125         spin_lock(&cfile->file_info_lock);
1126         if (server->ops->dir_needs_close(cfile)) {
1127                 cfile->invalidHandle = true;
1128                 spin_unlock(&cfile->file_info_lock);
1129                 if (server->ops->close_dir)
1130                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1131                 else
1132                         rc = -ENOSYS;
1133                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1134                 /* not much we can do if it fails anyway, ignore rc */
1135                 rc = 0;
1136         } else
1137                 spin_unlock(&cfile->file_info_lock);
1138
1139         buf = cfile->srch_inf.ntwrk_buf_start;
1140         if (buf) {
1141                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1142                 cfile->srch_inf.ntwrk_buf_start = NULL;
1143                 if (cfile->srch_inf.smallBuf)
1144                         cifs_small_buf_release(buf);
1145                 else
1146                         cifs_buf_release(buf);
1147         }
1148
1149         cifs_put_tlink(cfile->tlink);
1150         kfree(file->private_data);
1151         file->private_data = NULL;
1152         /* BB can we lock the filestruct while this is going on? */
1153         free_xid(xid);
1154         return rc;
1155 }
1156
1157 static struct cifsLockInfo *
1158 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1159 {
1160         struct cifsLockInfo *lock =
1161                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1162         if (!lock)
1163                 return lock;
1164         lock->offset = offset;
1165         lock->length = length;
1166         lock->type = type;
1167         lock->pid = current->tgid;
1168         lock->flags = flags;
1169         INIT_LIST_HEAD(&lock->blist);
1170         init_waitqueue_head(&lock->block_q);
1171         return lock;
1172 }
1173
1174 void
1175 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1176 {
1177         struct cifsLockInfo *li, *tmp;
1178         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1179                 list_del_init(&li->blist);
1180                 wake_up(&li->block_q);
1181         }
1182 }
1183
1184 #define CIFS_LOCK_OP    0
1185 #define CIFS_READ_OP    1
1186 #define CIFS_WRITE_OP   2
1187
1188 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1189 static bool
1190 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1191                             __u64 length, __u8 type, __u16 flags,
1192                             struct cifsFileInfo *cfile,
1193                             struct cifsLockInfo **conf_lock, int rw_check)
1194 {
1195         struct cifsLockInfo *li;
1196         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1197         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1198
1199         list_for_each_entry(li, &fdlocks->locks, llist) {
1200                 if (offset + length <= li->offset ||
1201                     offset >= li->offset + li->length)
1202                         continue;
1203                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1204                     server->ops->compare_fids(cfile, cur_cfile)) {
1205                         /* shared lock prevents write op through the same fid */
1206                         if (!(li->type & server->vals->shared_lock_type) ||
1207                             rw_check != CIFS_WRITE_OP)
1208                                 continue;
1209                 }
1210                 if ((type & server->vals->shared_lock_type) &&
1211                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1212                      current->tgid == li->pid) || type == li->type))
1213                         continue;
1214                 if (rw_check == CIFS_LOCK_OP &&
1215                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1216                     server->ops->compare_fids(cfile, cur_cfile))
1217                         continue;
1218                 if (conf_lock)
1219                         *conf_lock = li;
1220                 return true;
1221         }
1222         return false;
1223 }
1224
1225 bool
1226 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1227                         __u8 type, __u16 flags,
1228                         struct cifsLockInfo **conf_lock, int rw_check)
1229 {
1230         bool rc = false;
1231         struct cifs_fid_locks *cur;
1232         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1233
1234         list_for_each_entry(cur, &cinode->llist, llist) {
1235                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1236                                                  flags, cfile, conf_lock,
1237                                                  rw_check);
1238                 if (rc)
1239                         break;
1240         }
1241
1242         return rc;
1243 }
1244
1245 /*
1246  * Check if there is another lock that prevents us to set the lock (mandatory
1247  * style). If such a lock exists, update the flock structure with its
1248  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1249  * or leave it the same if we can't. Returns 0 if we don't need to request to
1250  * the server or 1 otherwise.
1251  */
1252 static int
1253 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1254                __u8 type, struct file_lock *flock)
1255 {
1256         int rc = 0;
1257         struct cifsLockInfo *conf_lock;
1258         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1259         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1260         bool exist;
1261
1262         down_read(&cinode->lock_sem);
1263
1264         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1265                                         flock->fl_flags, &conf_lock,
1266                                         CIFS_LOCK_OP);
1267         if (exist) {
1268                 flock->fl_start = conf_lock->offset;
1269                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1270                 flock->fl_pid = conf_lock->pid;
1271                 if (conf_lock->type & server->vals->shared_lock_type)
1272                         flock->fl_type = F_RDLCK;
1273                 else
1274                         flock->fl_type = F_WRLCK;
1275         } else if (!cinode->can_cache_brlcks)
1276                 rc = 1;
1277         else
1278                 flock->fl_type = F_UNLCK;
1279
1280         up_read(&cinode->lock_sem);
1281         return rc;
1282 }
1283
1284 static void
1285 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1286 {
1287         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1288         cifs_down_write(&cinode->lock_sem);
1289         list_add_tail(&lock->llist, &cfile->llist->locks);
1290         up_write(&cinode->lock_sem);
1291 }
1292
1293 /*
1294  * Set the byte-range lock (mandatory style). Returns:
1295  * 1) 0, if we set the lock and don't need to request to the server;
1296  * 2) 1, if no locks prevent us but we need to request to the server;
1297  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1298  */
1299 static int
1300 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1301                  bool wait)
1302 {
1303         struct cifsLockInfo *conf_lock;
1304         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1305         bool exist;
1306         int rc = 0;
1307
1308 try_again:
1309         exist = false;
1310         cifs_down_write(&cinode->lock_sem);
1311
1312         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1313                                         lock->type, lock->flags, &conf_lock,
1314                                         CIFS_LOCK_OP);
1315         if (!exist && cinode->can_cache_brlcks) {
1316                 list_add_tail(&lock->llist, &cfile->llist->locks);
1317                 up_write(&cinode->lock_sem);
1318                 return rc;
1319         }
1320
1321         if (!exist)
1322                 rc = 1;
1323         else if (!wait)
1324                 rc = -EACCES;
1325         else {
1326                 list_add_tail(&lock->blist, &conf_lock->blist);
1327                 up_write(&cinode->lock_sem);
1328                 rc = wait_event_interruptible(lock->block_q,
1329                                         (lock->blist.prev == &lock->blist) &&
1330                                         (lock->blist.next == &lock->blist));
1331                 if (!rc)
1332                         goto try_again;
1333                 cifs_down_write(&cinode->lock_sem);
1334                 list_del_init(&lock->blist);
1335         }
1336
1337         up_write(&cinode->lock_sem);
1338         return rc;
1339 }
1340
1341 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1342 /*
1343  * Check if there is another lock that prevents us to set the lock (posix
1344  * style). If such a lock exists, update the flock structure with its
1345  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1346  * or leave it the same if we can't. Returns 0 if we don't need to request to
1347  * the server or 1 otherwise.
1348  */
1349 static int
1350 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1351 {
1352         int rc = 0;
1353         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1354         unsigned char saved_type = flock->fl_type;
1355
1356         if ((flock->fl_flags & FL_POSIX) == 0)
1357                 return 1;
1358
1359         down_read(&cinode->lock_sem);
1360         posix_test_lock(file, flock);
1361
1362         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1363                 flock->fl_type = saved_type;
1364                 rc = 1;
1365         }
1366
1367         up_read(&cinode->lock_sem);
1368         return rc;
1369 }
1370
1371 /*
1372  * Set the byte-range lock (posix style). Returns:
1373  * 1) <0, if the error occurs while setting the lock;
1374  * 2) 0, if we set the lock and don't need to request to the server;
1375  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1376  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1377  */
1378 static int
1379 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1380 {
1381         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1382         int rc = FILE_LOCK_DEFERRED + 1;
1383
1384         if ((flock->fl_flags & FL_POSIX) == 0)
1385                 return rc;
1386
1387         cifs_down_write(&cinode->lock_sem);
1388         if (!cinode->can_cache_brlcks) {
1389                 up_write(&cinode->lock_sem);
1390                 return rc;
1391         }
1392
1393         rc = posix_lock_file(file, flock, NULL);
1394         up_write(&cinode->lock_sem);
1395         return rc;
1396 }
1397
1398 int
1399 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1400 {
1401         unsigned int xid;
1402         int rc = 0, stored_rc;
1403         struct cifsLockInfo *li, *tmp;
1404         struct cifs_tcon *tcon;
1405         unsigned int num, max_num, max_buf;
1406         LOCKING_ANDX_RANGE *buf, *cur;
1407         static const int types[] = {
1408                 LOCKING_ANDX_LARGE_FILES,
1409                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1410         };
1411         int i;
1412
1413         xid = get_xid();
1414         tcon = tlink_tcon(cfile->tlink);
1415
1416         /*
1417          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1418          * and check it before using.
1419          */
1420         max_buf = tcon->ses->server->maxBuf;
1421         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1422                 free_xid(xid);
1423                 return -EINVAL;
1424         }
1425
1426         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1427                      PAGE_SIZE);
1428         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1429                         PAGE_SIZE);
1430         max_num = (max_buf - sizeof(struct smb_hdr)) /
1431                                                 sizeof(LOCKING_ANDX_RANGE);
1432         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1433         if (!buf) {
1434                 free_xid(xid);
1435                 return -ENOMEM;
1436         }
1437
1438         for (i = 0; i < 2; i++) {
1439                 cur = buf;
1440                 num = 0;
1441                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1442                         if (li->type != types[i])
1443                                 continue;
1444                         cur->Pid = cpu_to_le16(li->pid);
1445                         cur->LengthLow = cpu_to_le32((u32)li->length);
1446                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1447                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1448                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1449                         if (++num == max_num) {
1450                                 stored_rc = cifs_lockv(xid, tcon,
1451                                                        cfile->fid.netfid,
1452                                                        (__u8)li->type, 0, num,
1453                                                        buf);
1454                                 if (stored_rc)
1455                                         rc = stored_rc;
1456                                 cur = buf;
1457                                 num = 0;
1458                         } else
1459                                 cur++;
1460                 }
1461
1462                 if (num) {
1463                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1464                                                (__u8)types[i], 0, num, buf);
1465                         if (stored_rc)
1466                                 rc = stored_rc;
1467                 }
1468         }
1469
1470         kfree(buf);
1471         free_xid(xid);
1472         return rc;
1473 }
1474
1475 static __u32
1476 hash_lockowner(fl_owner_t owner)
1477 {
1478         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1479 }
1480 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1481
1482 struct lock_to_push {
1483         struct list_head llist;
1484         __u64 offset;
1485         __u64 length;
1486         __u32 pid;
1487         __u16 netfid;
1488         __u8 type;
1489 };
1490
1491 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1492 static int
1493 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1494 {
1495         struct inode *inode = d_inode(cfile->dentry);
1496         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1497         struct file_lock *flock;
1498         struct file_lock_context *flctx = inode->i_flctx;
1499         unsigned int count = 0, i;
1500         int rc = 0, xid, type;
1501         struct list_head locks_to_send, *el;
1502         struct lock_to_push *lck, *tmp;
1503         __u64 length;
1504
1505         xid = get_xid();
1506
1507         if (!flctx)
1508                 goto out;
1509
1510         spin_lock(&flctx->flc_lock);
1511         list_for_each(el, &flctx->flc_posix) {
1512                 count++;
1513         }
1514         spin_unlock(&flctx->flc_lock);
1515
1516         INIT_LIST_HEAD(&locks_to_send);
1517
1518         /*
1519          * Allocating count locks is enough because no FL_POSIX locks can be
1520          * added to the list while we are holding cinode->lock_sem that
1521          * protects locking operations of this inode.
1522          */
1523         for (i = 0; i < count; i++) {
1524                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1525                 if (!lck) {
1526                         rc = -ENOMEM;
1527                         goto err_out;
1528                 }
1529                 list_add_tail(&lck->llist, &locks_to_send);
1530         }
1531
1532         el = locks_to_send.next;
1533         spin_lock(&flctx->flc_lock);
1534         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1535                 if (el == &locks_to_send) {
1536                         /*
1537                          * The list ended. We don't have enough allocated
1538                          * structures - something is really wrong.
1539                          */
1540                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1541                         break;
1542                 }
1543                 length = cifs_flock_len(flock);
1544                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1545                         type = CIFS_RDLCK;
1546                 else
1547                         type = CIFS_WRLCK;
1548                 lck = list_entry(el, struct lock_to_push, llist);
1549                 lck->pid = hash_lockowner(flock->fl_owner);
1550                 lck->netfid = cfile->fid.netfid;
1551                 lck->length = length;
1552                 lck->type = type;
1553                 lck->offset = flock->fl_start;
1554         }
1555         spin_unlock(&flctx->flc_lock);
1556
1557         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1558                 int stored_rc;
1559
1560                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1561                                              lck->offset, lck->length, NULL,
1562                                              lck->type, 0);
1563                 if (stored_rc)
1564                         rc = stored_rc;
1565                 list_del(&lck->llist);
1566                 kfree(lck);
1567         }
1568
1569 out:
1570         free_xid(xid);
1571         return rc;
1572 err_out:
1573         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1574                 list_del(&lck->llist);
1575                 kfree(lck);
1576         }
1577         goto out;
1578 }
1579 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1580
1581 static int
1582 cifs_push_locks(struct cifsFileInfo *cfile)
1583 {
1584         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1585         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1586         int rc = 0;
1587 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1588         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1589 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1590
1591         /* we are going to update can_cache_brlcks here - need a write access */
1592         cifs_down_write(&cinode->lock_sem);
1593         if (!cinode->can_cache_brlcks) {
1594                 up_write(&cinode->lock_sem);
1595                 return rc;
1596         }
1597
1598 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1599         if (cap_unix(tcon->ses) &&
1600             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1601             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1602                 rc = cifs_push_posix_locks(cfile);
1603         else
1604 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1605                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1606
1607         cinode->can_cache_brlcks = false;
1608         up_write(&cinode->lock_sem);
1609         return rc;
1610 }
1611
1612 static void
1613 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1614                 bool *wait_flag, struct TCP_Server_Info *server)
1615 {
1616         if (flock->fl_flags & FL_POSIX)
1617                 cifs_dbg(FYI, "Posix\n");
1618         if (flock->fl_flags & FL_FLOCK)
1619                 cifs_dbg(FYI, "Flock\n");
1620         if (flock->fl_flags & FL_SLEEP) {
1621                 cifs_dbg(FYI, "Blocking lock\n");
1622                 *wait_flag = true;
1623         }
1624         if (flock->fl_flags & FL_ACCESS)
1625                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1626         if (flock->fl_flags & FL_LEASE)
1627                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1628         if (flock->fl_flags &
1629             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1630                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1631                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1632
1633         *type = server->vals->large_lock_type;
1634         if (flock->fl_type == F_WRLCK) {
1635                 cifs_dbg(FYI, "F_WRLCK\n");
1636                 *type |= server->vals->exclusive_lock_type;
1637                 *lock = 1;
1638         } else if (flock->fl_type == F_UNLCK) {
1639                 cifs_dbg(FYI, "F_UNLCK\n");
1640                 *type |= server->vals->unlock_lock_type;
1641                 *unlock = 1;
1642                 /* Check if unlock includes more than one lock range */
1643         } else if (flock->fl_type == F_RDLCK) {
1644                 cifs_dbg(FYI, "F_RDLCK\n");
1645                 *type |= server->vals->shared_lock_type;
1646                 *lock = 1;
1647         } else if (flock->fl_type == F_EXLCK) {
1648                 cifs_dbg(FYI, "F_EXLCK\n");
1649                 *type |= server->vals->exclusive_lock_type;
1650                 *lock = 1;
1651         } else if (flock->fl_type == F_SHLCK) {
1652                 cifs_dbg(FYI, "F_SHLCK\n");
1653                 *type |= server->vals->shared_lock_type;
1654                 *lock = 1;
1655         } else
1656                 cifs_dbg(FYI, "Unknown type of lock\n");
1657 }
1658
1659 static int
1660 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1661            bool wait_flag, bool posix_lck, unsigned int xid)
1662 {
1663         int rc = 0;
1664         __u64 length = cifs_flock_len(flock);
1665         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1666         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1667         struct TCP_Server_Info *server = tcon->ses->server;
1668 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1669         __u16 netfid = cfile->fid.netfid;
1670
1671         if (posix_lck) {
1672                 int posix_lock_type;
1673
1674                 rc = cifs_posix_lock_test(file, flock);
1675                 if (!rc)
1676                         return rc;
1677
1678                 if (type & server->vals->shared_lock_type)
1679                         posix_lock_type = CIFS_RDLCK;
1680                 else
1681                         posix_lock_type = CIFS_WRLCK;
1682                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1683                                       hash_lockowner(flock->fl_owner),
1684                                       flock->fl_start, length, flock,
1685                                       posix_lock_type, wait_flag);
1686                 return rc;
1687         }
1688 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1689
1690         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1691         if (!rc)
1692                 return rc;
1693
1694         /* BB we could chain these into one lock request BB */
1695         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1696                                     1, 0, false);
1697         if (rc == 0) {
1698                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1699                                             type, 0, 1, false);
1700                 flock->fl_type = F_UNLCK;
1701                 if (rc != 0)
1702                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1703                                  rc);
1704                 return 0;
1705         }
1706
1707         if (type & server->vals->shared_lock_type) {
1708                 flock->fl_type = F_WRLCK;
1709                 return 0;
1710         }
1711
1712         type &= ~server->vals->exclusive_lock_type;
1713
1714         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1715                                     type | server->vals->shared_lock_type,
1716                                     1, 0, false);
1717         if (rc == 0) {
1718                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1719                         type | server->vals->shared_lock_type, 0, 1, false);
1720                 flock->fl_type = F_RDLCK;
1721                 if (rc != 0)
1722                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1723                                  rc);
1724         } else
1725                 flock->fl_type = F_WRLCK;
1726
1727         return 0;
1728 }
1729
1730 void
1731 cifs_move_llist(struct list_head *source, struct list_head *dest)
1732 {
1733         struct list_head *li, *tmp;
1734         list_for_each_safe(li, tmp, source)
1735                 list_move(li, dest);
1736 }
1737
1738 void
1739 cifs_free_llist(struct list_head *llist)
1740 {
1741         struct cifsLockInfo *li, *tmp;
1742         list_for_each_entry_safe(li, tmp, llist, llist) {
1743                 cifs_del_lock_waiters(li);
1744                 list_del(&li->llist);
1745                 kfree(li);
1746         }
1747 }
1748
1749 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1750 int
1751 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1752                   unsigned int xid)
1753 {
1754         int rc = 0, stored_rc;
1755         static const int types[] = {
1756                 LOCKING_ANDX_LARGE_FILES,
1757                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1758         };
1759         unsigned int i;
1760         unsigned int max_num, num, max_buf;
1761         LOCKING_ANDX_RANGE *buf, *cur;
1762         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1763         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1764         struct cifsLockInfo *li, *tmp;
1765         __u64 length = cifs_flock_len(flock);
1766         struct list_head tmp_llist;
1767
1768         INIT_LIST_HEAD(&tmp_llist);
1769
1770         /*
1771          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1772          * and check it before using.
1773          */
1774         max_buf = tcon->ses->server->maxBuf;
1775         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1776                 return -EINVAL;
1777
1778         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1779                      PAGE_SIZE);
1780         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1781                         PAGE_SIZE);
1782         max_num = (max_buf - sizeof(struct smb_hdr)) /
1783                                                 sizeof(LOCKING_ANDX_RANGE);
1784         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1785         if (!buf)
1786                 return -ENOMEM;
1787
1788         cifs_down_write(&cinode->lock_sem);
1789         for (i = 0; i < 2; i++) {
1790                 cur = buf;
1791                 num = 0;
1792                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1793                         if (flock->fl_start > li->offset ||
1794                             (flock->fl_start + length) <
1795                             (li->offset + li->length))
1796                                 continue;
1797                         if (current->tgid != li->pid)
1798                                 continue;
1799                         if (types[i] != li->type)
1800                                 continue;
1801                         if (cinode->can_cache_brlcks) {
1802                                 /*
1803                                  * We can cache brlock requests - simply remove
1804                                  * a lock from the file's list.
1805                                  */
1806                                 list_del(&li->llist);
1807                                 cifs_del_lock_waiters(li);
1808                                 kfree(li);
1809                                 continue;
1810                         }
1811                         cur->Pid = cpu_to_le16(li->pid);
1812                         cur->LengthLow = cpu_to_le32((u32)li->length);
1813                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1814                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1815                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1816                         /*
1817                          * We need to save a lock here to let us add it again to
1818                          * the file's list if the unlock range request fails on
1819                          * the server.
1820                          */
1821                         list_move(&li->llist, &tmp_llist);
1822                         if (++num == max_num) {
1823                                 stored_rc = cifs_lockv(xid, tcon,
1824                                                        cfile->fid.netfid,
1825                                                        li->type, num, 0, buf);
1826                                 if (stored_rc) {
1827                                         /*
1828                                          * We failed on the unlock range
1829                                          * request - add all locks from the tmp
1830                                          * list to the head of the file's list.
1831                                          */
1832                                         cifs_move_llist(&tmp_llist,
1833                                                         &cfile->llist->locks);
1834                                         rc = stored_rc;
1835                                 } else
1836                                         /*
1837                                          * The unlock range request succeed -
1838                                          * free the tmp list.
1839                                          */
1840                                         cifs_free_llist(&tmp_llist);
1841                                 cur = buf;
1842                                 num = 0;
1843                         } else
1844                                 cur++;
1845                 }
1846                 if (num) {
1847                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1848                                                types[i], num, 0, buf);
1849                         if (stored_rc) {
1850                                 cifs_move_llist(&tmp_llist,
1851                                                 &cfile->llist->locks);
1852                                 rc = stored_rc;
1853                         } else
1854                                 cifs_free_llist(&tmp_llist);
1855                 }
1856         }
1857
1858         up_write(&cinode->lock_sem);
1859         kfree(buf);
1860         return rc;
1861 }
1862 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1863
1864 static int
1865 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1866            bool wait_flag, bool posix_lck, int lock, int unlock,
1867            unsigned int xid)
1868 {
1869         int rc = 0;
1870         __u64 length = cifs_flock_len(flock);
1871         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1872         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1873         struct TCP_Server_Info *server = tcon->ses->server;
1874         struct inode *inode = d_inode(cfile->dentry);
1875
1876 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1877         if (posix_lck) {
1878                 int posix_lock_type;
1879
1880                 rc = cifs_posix_lock_set(file, flock);
1881                 if (rc <= FILE_LOCK_DEFERRED)
1882                         return rc;
1883
1884                 if (type & server->vals->shared_lock_type)
1885                         posix_lock_type = CIFS_RDLCK;
1886                 else
1887                         posix_lock_type = CIFS_WRLCK;
1888
1889                 if (unlock == 1)
1890                         posix_lock_type = CIFS_UNLCK;
1891
1892                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1893                                       hash_lockowner(flock->fl_owner),
1894                                       flock->fl_start, length,
1895                                       NULL, posix_lock_type, wait_flag);
1896                 goto out;
1897         }
1898 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1899         if (lock) {
1900                 struct cifsLockInfo *lock;
1901
1902                 lock = cifs_lock_init(flock->fl_start, length, type,
1903                                       flock->fl_flags);
1904                 if (!lock)
1905                         return -ENOMEM;
1906
1907                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1908                 if (rc < 0) {
1909                         kfree(lock);
1910                         return rc;
1911                 }
1912                 if (!rc)
1913                         goto out;
1914
1915                 /*
1916                  * Windows 7 server can delay breaking lease from read to None
1917                  * if we set a byte-range lock on a file - break it explicitly
1918                  * before sending the lock to the server to be sure the next
1919                  * read won't conflict with non-overlapted locks due to
1920                  * pagereading.
1921                  */
1922                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1923                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1924                         cifs_zap_mapping(inode);
1925                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1926                                  inode);
1927                         CIFS_I(inode)->oplock = 0;
1928                 }
1929
1930                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1931                                             type, 1, 0, wait_flag);
1932                 if (rc) {
1933                         kfree(lock);
1934                         return rc;
1935                 }
1936
1937                 cifs_lock_add(cfile, lock);
1938         } else if (unlock)
1939                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1940
1941 out:
1942         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1943                 /*
1944                  * If this is a request to remove all locks because we
1945                  * are closing the file, it doesn't matter if the
1946                  * unlocking failed as both cifs.ko and the SMB server
1947                  * remove the lock on file close
1948                  */
1949                 if (rc) {
1950                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1951                         if (!(flock->fl_flags & FL_CLOSE))
1952                                 return rc;
1953                 }
1954                 rc = locks_lock_file_wait(file, flock);
1955         }
1956         return rc;
1957 }
1958
1959 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1960 {
1961         int rc, xid;
1962         int lock = 0, unlock = 0;
1963         bool wait_flag = false;
1964         bool posix_lck = false;
1965         struct cifs_sb_info *cifs_sb;
1966         struct cifs_tcon *tcon;
1967         struct cifsFileInfo *cfile;
1968         __u32 type;
1969
1970         xid = get_xid();
1971
1972         if (!(fl->fl_flags & FL_FLOCK)) {
1973                 rc = -ENOLCK;
1974                 free_xid(xid);
1975                 return rc;
1976         }
1977
1978         cfile = (struct cifsFileInfo *)file->private_data;
1979         tcon = tlink_tcon(cfile->tlink);
1980
1981         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1982                         tcon->ses->server);
1983         cifs_sb = CIFS_FILE_SB(file);
1984
1985         if (cap_unix(tcon->ses) &&
1986             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1987             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1988                 posix_lck = true;
1989
1990         if (!lock && !unlock) {
1991                 /*
1992                  * if no lock or unlock then nothing to do since we do not
1993                  * know what it is
1994                  */
1995                 rc = -EOPNOTSUPP;
1996                 free_xid(xid);
1997                 return rc;
1998         }
1999
2000         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2001                         xid);
2002         free_xid(xid);
2003         return rc;
2004
2005
2006 }
2007
2008 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2009 {
2010         int rc, xid;
2011         int lock = 0, unlock = 0;
2012         bool wait_flag = false;
2013         bool posix_lck = false;
2014         struct cifs_sb_info *cifs_sb;
2015         struct cifs_tcon *tcon;
2016         struct cifsFileInfo *cfile;
2017         __u32 type;
2018
2019         rc = -EACCES;
2020         xid = get_xid();
2021
2022         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2023                  flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2024                  (long long)flock->fl_end);
2025
2026         cfile = (struct cifsFileInfo *)file->private_data;
2027         tcon = tlink_tcon(cfile->tlink);
2028
2029         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2030                         tcon->ses->server);
2031         cifs_sb = CIFS_FILE_SB(file);
2032         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2033
2034         if (cap_unix(tcon->ses) &&
2035             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2036             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2037                 posix_lck = true;
2038         /*
2039          * BB add code here to normalize offset and length to account for
2040          * negative length which we can not accept over the wire.
2041          */
2042         if (IS_GETLK(cmd)) {
2043                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2044                 free_xid(xid);
2045                 return rc;
2046         }
2047
2048         if (!lock && !unlock) {
2049                 /*
2050                  * if no lock or unlock then nothing to do since we do not
2051                  * know what it is
2052                  */
2053                 free_xid(xid);
2054                 return -EOPNOTSUPP;
2055         }
2056
2057         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2058                         xid);
2059         free_xid(xid);
2060         return rc;
2061 }
2062
2063 /*
2064  * update the file size (if needed) after a write. Should be called with
2065  * the inode->i_lock held
2066  */
2067 void
2068 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2069                       unsigned int bytes_written)
2070 {
2071         loff_t end_of_write = offset + bytes_written;
2072
2073         if (end_of_write > cifsi->server_eof)
2074                 cifsi->server_eof = end_of_write;
2075 }
2076
2077 static ssize_t
2078 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2079            size_t write_size, loff_t *offset)
2080 {
2081         int rc = 0;
2082         unsigned int bytes_written = 0;
2083         unsigned int total_written;
2084         struct cifs_tcon *tcon;
2085         struct TCP_Server_Info *server;
2086         unsigned int xid;
2087         struct dentry *dentry = open_file->dentry;
2088         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2089         struct cifs_io_parms io_parms = {0};
2090
2091         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2092                  write_size, *offset, dentry);
2093
2094         tcon = tlink_tcon(open_file->tlink);
2095         server = tcon->ses->server;
2096
2097         if (!server->ops->sync_write)
2098                 return -ENOSYS;
2099
2100         xid = get_xid();
2101
2102         for (total_written = 0; write_size > total_written;
2103              total_written += bytes_written) {
2104                 rc = -EAGAIN;
2105                 while (rc == -EAGAIN) {
2106                         struct kvec iov[2];
2107                         unsigned int len;
2108
2109                         if (open_file->invalidHandle) {
2110                                 /* we could deadlock if we called
2111                                    filemap_fdatawait from here so tell
2112                                    reopen_file not to flush data to
2113                                    server now */
2114                                 rc = cifs_reopen_file(open_file, false);
2115                                 if (rc != 0)
2116                                         break;
2117                         }
2118
2119                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2120                                   (unsigned int)write_size - total_written);
2121                         /* iov[0] is reserved for smb header */
2122                         iov[1].iov_base = (char *)write_data + total_written;
2123                         iov[1].iov_len = len;
2124                         io_parms.pid = pid;
2125                         io_parms.tcon = tcon;
2126                         io_parms.offset = *offset;
2127                         io_parms.length = len;
2128                         rc = server->ops->sync_write(xid, &open_file->fid,
2129                                         &io_parms, &bytes_written, iov, 1);
2130                 }
2131                 if (rc || (bytes_written == 0)) {
2132                         if (total_written)
2133                                 break;
2134                         else {
2135                                 free_xid(xid);
2136                                 return rc;
2137                         }
2138                 } else {
2139                         spin_lock(&d_inode(dentry)->i_lock);
2140                         cifs_update_eof(cifsi, *offset, bytes_written);
2141                         spin_unlock(&d_inode(dentry)->i_lock);
2142                         *offset += bytes_written;
2143                 }
2144         }
2145
2146         cifs_stats_bytes_written(tcon, total_written);
2147
2148         if (total_written > 0) {
2149                 spin_lock(&d_inode(dentry)->i_lock);
2150                 if (*offset > d_inode(dentry)->i_size) {
2151                         i_size_write(d_inode(dentry), *offset);
2152                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2153                 }
2154                 spin_unlock(&d_inode(dentry)->i_lock);
2155         }
2156         mark_inode_dirty_sync(d_inode(dentry));
2157         free_xid(xid);
2158         return total_written;
2159 }
2160
2161 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2162                                         bool fsuid_only)
2163 {
2164         struct cifsFileInfo *open_file = NULL;
2165         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2166
2167         /* only filter by fsuid on multiuser mounts */
2168         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2169                 fsuid_only = false;
2170
2171         spin_lock(&cifs_inode->open_file_lock);
2172         /* we could simply get the first_list_entry since write-only entries
2173            are always at the end of the list but since the first entry might
2174            have a close pending, we go through the whole list */
2175         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2176                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2177                         continue;
2178                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2179                         if ((!open_file->invalidHandle)) {
2180                                 /* found a good file */
2181                                 /* lock it so it will not be closed on us */
2182                                 cifsFileInfo_get(open_file);
2183                                 spin_unlock(&cifs_inode->open_file_lock);
2184                                 return open_file;
2185                         } /* else might as well continue, and look for
2186                              another, or simply have the caller reopen it
2187                              again rather than trying to fix this handle */
2188                 } else /* write only file */
2189                         break; /* write only files are last so must be done */
2190         }
2191         spin_unlock(&cifs_inode->open_file_lock);
2192         return NULL;
2193 }
2194
2195 /* Return -EBADF if no handle is found and general rc otherwise */
2196 int
2197 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2198                        struct cifsFileInfo **ret_file)
2199 {
2200         struct cifsFileInfo *open_file, *inv_file = NULL;
2201         struct cifs_sb_info *cifs_sb;
2202         bool any_available = false;
2203         int rc = -EBADF;
2204         unsigned int refind = 0;
2205         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2206         bool with_delete = flags & FIND_WR_WITH_DELETE;
2207         *ret_file = NULL;
2208
2209         /*
2210          * Having a null inode here (because mapping->host was set to zero by
2211          * the VFS or MM) should not happen but we had reports of on oops (due
2212          * to it being zero) during stress testcases so we need to check for it
2213          */
2214
2215         if (cifs_inode == NULL) {
2216                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2217                 dump_stack();
2218                 return rc;
2219         }
2220
2221         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2222
2223         /* only filter by fsuid on multiuser mounts */
2224         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2225                 fsuid_only = false;
2226
2227         spin_lock(&cifs_inode->open_file_lock);
2228 refind_writable:
2229         if (refind > MAX_REOPEN_ATT) {
2230                 spin_unlock(&cifs_inode->open_file_lock);
2231                 return rc;
2232         }
2233         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2234                 if (!any_available && open_file->pid != current->tgid)
2235                         continue;
2236                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2237                         continue;
2238                 if (with_delete && !(open_file->fid.access & DELETE))
2239                         continue;
2240                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2241                         if (!open_file->invalidHandle) {
2242                                 /* found a good writable file */
2243                                 cifsFileInfo_get(open_file);
2244                                 spin_unlock(&cifs_inode->open_file_lock);
2245                                 *ret_file = open_file;
2246                                 return 0;
2247                         } else {
2248                                 if (!inv_file)
2249                                         inv_file = open_file;
2250                         }
2251                 }
2252         }
2253         /* couldn't find useable FH with same pid, try any available */
2254         if (!any_available) {
2255                 any_available = true;
2256                 goto refind_writable;
2257         }
2258
2259         if (inv_file) {
2260                 any_available = false;
2261                 cifsFileInfo_get(inv_file);
2262         }
2263
2264         spin_unlock(&cifs_inode->open_file_lock);
2265
2266         if (inv_file) {
2267                 rc = cifs_reopen_file(inv_file, false);
2268                 if (!rc) {
2269                         *ret_file = inv_file;
2270                         return 0;
2271                 }
2272
2273                 spin_lock(&cifs_inode->open_file_lock);
2274                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2275                 spin_unlock(&cifs_inode->open_file_lock);
2276                 cifsFileInfo_put(inv_file);
2277                 ++refind;
2278                 inv_file = NULL;
2279                 spin_lock(&cifs_inode->open_file_lock);
2280                 goto refind_writable;
2281         }
2282
2283         return rc;
2284 }
2285
2286 struct cifsFileInfo *
2287 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2288 {
2289         struct cifsFileInfo *cfile;
2290         int rc;
2291
2292         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2293         if (rc)
2294                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2295
2296         return cfile;
2297 }
2298
2299 int
2300 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2301                        int flags,
2302                        struct cifsFileInfo **ret_file)
2303 {
2304         struct cifsFileInfo *cfile;
2305         void *page = alloc_dentry_path();
2306
2307         *ret_file = NULL;
2308
2309         spin_lock(&tcon->open_file_lock);
2310         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2311                 struct cifsInodeInfo *cinode;
2312                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2313                 if (IS_ERR(full_path)) {
2314                         spin_unlock(&tcon->open_file_lock);
2315                         free_dentry_path(page);
2316                         return PTR_ERR(full_path);
2317                 }
2318                 if (strcmp(full_path, name))
2319                         continue;
2320
2321                 cinode = CIFS_I(d_inode(cfile->dentry));
2322                 spin_unlock(&tcon->open_file_lock);
2323                 free_dentry_path(page);
2324                 return cifs_get_writable_file(cinode, flags, ret_file);
2325         }
2326
2327         spin_unlock(&tcon->open_file_lock);
2328         free_dentry_path(page);
2329         return -ENOENT;
2330 }
2331
2332 int
2333 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2334                        struct cifsFileInfo **ret_file)
2335 {
2336         struct cifsFileInfo *cfile;
2337         void *page = alloc_dentry_path();
2338
2339         *ret_file = NULL;
2340
2341         spin_lock(&tcon->open_file_lock);
2342         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2343                 struct cifsInodeInfo *cinode;
2344                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2345                 if (IS_ERR(full_path)) {
2346                         spin_unlock(&tcon->open_file_lock);
2347                         free_dentry_path(page);
2348                         return PTR_ERR(full_path);
2349                 }
2350                 if (strcmp(full_path, name))
2351                         continue;
2352
2353                 cinode = CIFS_I(d_inode(cfile->dentry));
2354                 spin_unlock(&tcon->open_file_lock);
2355                 free_dentry_path(page);
2356                 *ret_file = find_readable_file(cinode, 0);
2357                 return *ret_file ? 0 : -ENOENT;
2358         }
2359
2360         spin_unlock(&tcon->open_file_lock);
2361         free_dentry_path(page);
2362         return -ENOENT;
2363 }
2364
2365 void
2366 cifs_writedata_release(struct kref *refcount)
2367 {
2368         struct cifs_writedata *wdata = container_of(refcount,
2369                                         struct cifs_writedata, refcount);
2370 #ifdef CONFIG_CIFS_SMB_DIRECT
2371         if (wdata->mr) {
2372                 smbd_deregister_mr(wdata->mr);
2373                 wdata->mr = NULL;
2374         }
2375 #endif
2376
2377         if (wdata->cfile)
2378                 cifsFileInfo_put(wdata->cfile);
2379
2380         kvfree(wdata->pages);
2381         kfree(wdata);
2382 }
2383
2384 /*
2385  * Write failed with a retryable error. Resend the write request. It's also
2386  * possible that the page was redirtied so re-clean the page.
2387  */
2388 static void
2389 cifs_writev_requeue(struct cifs_writedata *wdata)
2390 {
2391         int i, rc = 0;
2392         struct inode *inode = d_inode(wdata->cfile->dentry);
2393         struct TCP_Server_Info *server;
2394         unsigned int rest_len;
2395
2396         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2397         i = 0;
2398         rest_len = wdata->bytes;
2399         do {
2400                 struct cifs_writedata *wdata2;
2401                 unsigned int j, nr_pages, wsize, tailsz, cur_len;
2402
2403                 wsize = server->ops->wp_retry_size(inode);
2404                 if (wsize < rest_len) {
2405                         nr_pages = wsize / PAGE_SIZE;
2406                         if (!nr_pages) {
2407                                 rc = -EOPNOTSUPP;
2408                                 break;
2409                         }
2410                         cur_len = nr_pages * PAGE_SIZE;
2411                         tailsz = PAGE_SIZE;
2412                 } else {
2413                         nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
2414                         cur_len = rest_len;
2415                         tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
2416                 }
2417
2418                 wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
2419                 if (!wdata2) {
2420                         rc = -ENOMEM;
2421                         break;
2422                 }
2423
2424                 for (j = 0; j < nr_pages; j++) {
2425                         wdata2->pages[j] = wdata->pages[i + j];
2426                         lock_page(wdata2->pages[j]);
2427                         clear_page_dirty_for_io(wdata2->pages[j]);
2428                 }
2429
2430                 wdata2->sync_mode = wdata->sync_mode;
2431                 wdata2->nr_pages = nr_pages;
2432                 wdata2->offset = page_offset(wdata2->pages[0]);
2433                 wdata2->pagesz = PAGE_SIZE;
2434                 wdata2->tailsz = tailsz;
2435                 wdata2->bytes = cur_len;
2436
2437                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2438                                             &wdata2->cfile);
2439                 if (!wdata2->cfile) {
2440                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2441                                  rc);
2442                         if (!is_retryable_error(rc))
2443                                 rc = -EBADF;
2444                 } else {
2445                         wdata2->pid = wdata2->cfile->pid;
2446                         rc = server->ops->async_writev(wdata2,
2447                                                        cifs_writedata_release);
2448                 }
2449
2450                 for (j = 0; j < nr_pages; j++) {
2451                         unlock_page(wdata2->pages[j]);
2452                         if (rc != 0 && !is_retryable_error(rc)) {
2453                                 SetPageError(wdata2->pages[j]);
2454                                 end_page_writeback(wdata2->pages[j]);
2455                                 put_page(wdata2->pages[j]);
2456                         }
2457                 }
2458
2459                 kref_put(&wdata2->refcount, cifs_writedata_release);
2460                 if (rc) {
2461                         if (is_retryable_error(rc))
2462                                 continue;
2463                         i += nr_pages;
2464                         break;
2465                 }
2466
2467                 rest_len -= cur_len;
2468                 i += nr_pages;
2469         } while (i < wdata->nr_pages);
2470
2471         /* cleanup remaining pages from the original wdata */
2472         for (; i < wdata->nr_pages; i++) {
2473                 SetPageError(wdata->pages[i]);
2474                 end_page_writeback(wdata->pages[i]);
2475                 put_page(wdata->pages[i]);
2476         }
2477
2478         if (rc != 0 && !is_retryable_error(rc))
2479                 mapping_set_error(inode->i_mapping, rc);
2480         kref_put(&wdata->refcount, cifs_writedata_release);
2481 }
2482
2483 void
2484 cifs_writev_complete(struct work_struct *work)
2485 {
2486         struct cifs_writedata *wdata = container_of(work,
2487                                                 struct cifs_writedata, work);
2488         struct inode *inode = d_inode(wdata->cfile->dentry);
2489         int i = 0;
2490
2491         if (wdata->result == 0) {
2492                 spin_lock(&inode->i_lock);
2493                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2494                 spin_unlock(&inode->i_lock);
2495                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2496                                          wdata->bytes);
2497         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2498                 return cifs_writev_requeue(wdata);
2499
2500         for (i = 0; i < wdata->nr_pages; i++) {
2501                 struct page *page = wdata->pages[i];
2502
2503                 if (wdata->result == -EAGAIN)
2504                         __set_page_dirty_nobuffers(page);
2505                 else if (wdata->result < 0)
2506                         SetPageError(page);
2507                 end_page_writeback(page);
2508                 cifs_readpage_to_fscache(inode, page);
2509                 put_page(page);
2510         }
2511         if (wdata->result != -EAGAIN)
2512                 mapping_set_error(inode->i_mapping, wdata->result);
2513         kref_put(&wdata->refcount, cifs_writedata_release);
2514 }
2515
2516 struct cifs_writedata *
2517 cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
2518 {
2519         struct cifs_writedata *writedata = NULL;
2520         struct page **pages =
2521                 kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
2522         if (pages) {
2523                 writedata = cifs_writedata_direct_alloc(pages, complete);
2524                 if (!writedata)
2525                         kvfree(pages);
2526         }
2527
2528         return writedata;
2529 }
2530
2531 struct cifs_writedata *
2532 cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
2533 {
2534         struct cifs_writedata *wdata;
2535
2536         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2537         if (wdata != NULL) {
2538                 wdata->pages = pages;
2539                 kref_init(&wdata->refcount);
2540                 INIT_LIST_HEAD(&wdata->list);
2541                 init_completion(&wdata->done);
2542                 INIT_WORK(&wdata->work, complete);
2543         }
2544         return wdata;
2545 }
2546
2547
2548 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2549 {
2550         struct address_space *mapping = page->mapping;
2551         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2552         char *write_data;
2553         int rc = -EFAULT;
2554         int bytes_written = 0;
2555         struct inode *inode;
2556         struct cifsFileInfo *open_file;
2557
2558         if (!mapping || !mapping->host)
2559                 return -EFAULT;
2560
2561         inode = page->mapping->host;
2562
2563         offset += (loff_t)from;
2564         write_data = kmap(page);
2565         write_data += from;
2566
2567         if ((to > PAGE_SIZE) || (from > to)) {
2568                 kunmap(page);
2569                 return -EIO;
2570         }
2571
2572         /* racing with truncate? */
2573         if (offset > mapping->host->i_size) {
2574                 kunmap(page);
2575                 return 0; /* don't care */
2576         }
2577
2578         /* check to make sure that we are not extending the file */
2579         if (mapping->host->i_size - offset < (loff_t)to)
2580                 to = (unsigned)(mapping->host->i_size - offset);
2581
2582         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2583                                     &open_file);
2584         if (!rc) {
2585                 bytes_written = cifs_write(open_file, open_file->pid,
2586                                            write_data, to - from, &offset);
2587                 cifsFileInfo_put(open_file);
2588                 /* Does mm or vfs already set times? */
2589                 inode->i_atime = inode->i_mtime = current_time(inode);
2590                 if ((bytes_written > 0) && (offset))
2591                         rc = 0;
2592                 else if (bytes_written < 0)
2593                         rc = bytes_written;
2594                 else
2595                         rc = -EFAULT;
2596         } else {
2597                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2598                 if (!is_retryable_error(rc))
2599                         rc = -EIO;
2600         }
2601
2602         kunmap(page);
2603         return rc;
2604 }
2605
2606 static struct cifs_writedata *
2607 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2608                           pgoff_t end, pgoff_t *index,
2609                           unsigned int *found_pages)
2610 {
2611         struct cifs_writedata *wdata;
2612
2613         wdata = cifs_writedata_alloc((unsigned int)tofind,
2614                                      cifs_writev_complete);
2615         if (!wdata)
2616                 return NULL;
2617
2618         *found_pages = find_get_pages_range_tag(mapping, index, end,
2619                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2620         return wdata;
2621 }
2622
2623 static unsigned int
2624 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2625                     struct address_space *mapping,
2626                     struct writeback_control *wbc,
2627                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2628 {
2629         unsigned int nr_pages = 0, i;
2630         struct page *page;
2631
2632         for (i = 0; i < found_pages; i++) {
2633                 page = wdata->pages[i];
2634                 /*
2635                  * At this point we hold neither the i_pages lock nor the
2636                  * page lock: the page may be truncated or invalidated
2637                  * (changing page->mapping to NULL), or even swizzled
2638                  * back from swapper_space to tmpfs file mapping
2639                  */
2640
2641                 if (nr_pages == 0)
2642                         lock_page(page);
2643                 else if (!trylock_page(page))
2644                         break;
2645
2646                 if (unlikely(page->mapping != mapping)) {
2647                         unlock_page(page);
2648                         break;
2649                 }
2650
2651                 if (!wbc->range_cyclic && page->index > end) {
2652                         *done = true;
2653                         unlock_page(page);
2654                         break;
2655                 }
2656
2657                 if (*next && (page->index != *next)) {
2658                         /* Not next consecutive page */
2659                         unlock_page(page);
2660                         break;
2661                 }
2662
2663                 if (wbc->sync_mode != WB_SYNC_NONE)
2664                         wait_on_page_writeback(page);
2665
2666                 if (PageWriteback(page) ||
2667                                 !clear_page_dirty_for_io(page)) {
2668                         unlock_page(page);
2669                         break;
2670                 }
2671
2672                 /*
2673                  * This actually clears the dirty bit in the radix tree.
2674                  * See cifs_writepage() for more commentary.
2675                  */
2676                 set_page_writeback(page);
2677                 if (page_offset(page) >= i_size_read(mapping->host)) {
2678                         *done = true;
2679                         unlock_page(page);
2680                         end_page_writeback(page);
2681                         break;
2682                 }
2683
2684                 wdata->pages[i] = page;
2685                 *next = page->index + 1;
2686                 ++nr_pages;
2687         }
2688
2689         /* reset index to refind any pages skipped */
2690         if (nr_pages == 0)
2691                 *index = wdata->pages[0]->index + 1;
2692
2693         /* put any pages we aren't going to use */
2694         for (i = nr_pages; i < found_pages; i++) {
2695                 put_page(wdata->pages[i]);
2696                 wdata->pages[i] = NULL;
2697         }
2698
2699         return nr_pages;
2700 }
2701
2702 static int
2703 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2704                  struct address_space *mapping, struct writeback_control *wbc)
2705 {
2706         int rc;
2707
2708         wdata->sync_mode = wbc->sync_mode;
2709         wdata->nr_pages = nr_pages;
2710         wdata->offset = page_offset(wdata->pages[0]);
2711         wdata->pagesz = PAGE_SIZE;
2712         wdata->tailsz = min(i_size_read(mapping->host) -
2713                         page_offset(wdata->pages[nr_pages - 1]),
2714                         (loff_t)PAGE_SIZE);
2715         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2716         wdata->pid = wdata->cfile->pid;
2717
2718         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2719         if (rc)
2720                 return rc;
2721
2722         if (wdata->cfile->invalidHandle)
2723                 rc = -EAGAIN;
2724         else
2725                 rc = wdata->server->ops->async_writev(wdata,
2726                                                       cifs_writedata_release);
2727
2728         return rc;
2729 }
2730
2731 static int cifs_writepages(struct address_space *mapping,
2732                            struct writeback_control *wbc)
2733 {
2734         struct inode *inode = mapping->host;
2735         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2736         struct TCP_Server_Info *server;
2737         bool done = false, scanned = false, range_whole = false;
2738         pgoff_t end, index;
2739         struct cifs_writedata *wdata;
2740         struct cifsFileInfo *cfile = NULL;
2741         int rc = 0;
2742         int saved_rc = 0;
2743         unsigned int xid;
2744
2745         /*
2746          * If wsize is smaller than the page cache size, default to writing
2747          * one page at a time via cifs_writepage
2748          */
2749         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2750                 return generic_writepages(mapping, wbc);
2751
2752         xid = get_xid();
2753         if (wbc->range_cyclic) {
2754                 index = mapping->writeback_index; /* Start from prev offset */
2755                 end = -1;
2756         } else {
2757                 index = wbc->range_start >> PAGE_SHIFT;
2758                 end = wbc->range_end >> PAGE_SHIFT;
2759                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2760                         range_whole = true;
2761                 scanned = true;
2762         }
2763         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2764
2765 retry:
2766         while (!done && index <= end) {
2767                 unsigned int i, nr_pages, found_pages, wsize;
2768                 pgoff_t next = 0, tofind, saved_index = index;
2769                 struct cifs_credits credits_on_stack;
2770                 struct cifs_credits *credits = &credits_on_stack;
2771                 int get_file_rc = 0;
2772
2773                 if (cfile)
2774                         cifsFileInfo_put(cfile);
2775
2776                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2777
2778                 /* in case of an error store it to return later */
2779                 if (rc)
2780                         get_file_rc = rc;
2781
2782                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2783                                                    &wsize, credits);
2784                 if (rc != 0) {
2785                         done = true;
2786                         break;
2787                 }
2788
2789                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2790
2791                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2792                                                   &found_pages);
2793                 if (!wdata) {
2794                         rc = -ENOMEM;
2795                         done = true;
2796                         add_credits_and_wake_if(server, credits, 0);
2797                         break;
2798                 }
2799
2800                 if (found_pages == 0) {
2801                         kref_put(&wdata->refcount, cifs_writedata_release);
2802                         add_credits_and_wake_if(server, credits, 0);
2803                         break;
2804                 }
2805
2806                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2807                                                end, &index, &next, &done);
2808
2809                 /* nothing to write? */
2810                 if (nr_pages == 0) {
2811                         kref_put(&wdata->refcount, cifs_writedata_release);
2812                         add_credits_and_wake_if(server, credits, 0);
2813                         continue;
2814                 }
2815
2816                 wdata->credits = credits_on_stack;
2817                 wdata->cfile = cfile;
2818                 wdata->server = server;
2819                 cfile = NULL;
2820
2821                 if (!wdata->cfile) {
2822                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2823                                  get_file_rc);
2824                         if (is_retryable_error(get_file_rc))
2825                                 rc = get_file_rc;
2826                         else
2827                                 rc = -EBADF;
2828                 } else
2829                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2830
2831                 for (i = 0; i < nr_pages; ++i)
2832                         unlock_page(wdata->pages[i]);
2833
2834                 /* send failure -- clean up the mess */
2835                 if (rc != 0) {
2836                         add_credits_and_wake_if(server, &wdata->credits, 0);
2837                         for (i = 0; i < nr_pages; ++i) {
2838                                 if (is_retryable_error(rc))
2839                                         redirty_page_for_writepage(wbc,
2840                                                            wdata->pages[i]);
2841                                 else
2842                                         SetPageError(wdata->pages[i]);
2843                                 end_page_writeback(wdata->pages[i]);
2844                                 put_page(wdata->pages[i]);
2845                         }
2846                         if (!is_retryable_error(rc))
2847                                 mapping_set_error(mapping, rc);
2848                 }
2849                 kref_put(&wdata->refcount, cifs_writedata_release);
2850
2851                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2852                         index = saved_index;
2853                         continue;
2854                 }
2855
2856                 /* Return immediately if we received a signal during writing */
2857                 if (is_interrupt_error(rc)) {
2858                         done = true;
2859                         break;
2860                 }
2861
2862                 if (rc != 0 && saved_rc == 0)
2863                         saved_rc = rc;
2864
2865                 wbc->nr_to_write -= nr_pages;
2866                 if (wbc->nr_to_write <= 0)
2867                         done = true;
2868
2869                 index = next;
2870         }
2871
2872         if (!scanned && !done) {
2873                 /*
2874                  * We hit the last page and there is more work to be done: wrap
2875                  * back to the start of the file
2876                  */
2877                 scanned = true;
2878                 index = 0;
2879                 goto retry;
2880         }
2881
2882         if (saved_rc != 0)
2883                 rc = saved_rc;
2884
2885         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2886                 mapping->writeback_index = index;
2887
2888         if (cfile)
2889                 cifsFileInfo_put(cfile);
2890         free_xid(xid);
2891         /* Indication to update ctime and mtime as close is deferred */
2892         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2893         return rc;
2894 }
2895
2896 static int
2897 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2898 {
2899         int rc;
2900         unsigned int xid;
2901
2902         xid = get_xid();
2903 /* BB add check for wbc flags */
2904         get_page(page);
2905         if (!PageUptodate(page))
2906                 cifs_dbg(FYI, "ppw - page not up to date\n");
2907
2908         /*
2909          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2910          *
2911          * A writepage() implementation always needs to do either this,
2912          * or re-dirty the page with "redirty_page_for_writepage()" in
2913          * the case of a failure.
2914          *
2915          * Just unlocking the page will cause the radix tree tag-bits
2916          * to fail to update with the state of the page correctly.
2917          */
2918         set_page_writeback(page);
2919 retry_write:
2920         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2921         if (is_retryable_error(rc)) {
2922                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2923                         goto retry_write;
2924                 redirty_page_for_writepage(wbc, page);
2925         } else if (rc != 0) {
2926                 SetPageError(page);
2927                 mapping_set_error(page->mapping, rc);
2928         } else {
2929                 SetPageUptodate(page);
2930         }
2931         end_page_writeback(page);
2932         put_page(page);
2933         free_xid(xid);
2934         return rc;
2935 }
2936
2937 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2938 {
2939         int rc = cifs_writepage_locked(page, wbc);
2940         unlock_page(page);
2941         return rc;
2942 }
2943
2944 static int cifs_write_end(struct file *file, struct address_space *mapping,
2945                         loff_t pos, unsigned len, unsigned copied,
2946                         struct page *page, void *fsdata)
2947 {
2948         int rc;
2949         struct inode *inode = mapping->host;
2950         struct cifsFileInfo *cfile = file->private_data;
2951         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2952         __u32 pid;
2953
2954         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2955                 pid = cfile->pid;
2956         else
2957                 pid = current->tgid;
2958
2959         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2960                  page, pos, copied);
2961
2962         if (PageChecked(page)) {
2963                 if (copied == len)
2964                         SetPageUptodate(page);
2965                 ClearPageChecked(page);
2966         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2967                 SetPageUptodate(page);
2968
2969         if (!PageUptodate(page)) {
2970                 char *page_data;
2971                 unsigned offset = pos & (PAGE_SIZE - 1);
2972                 unsigned int xid;
2973
2974                 xid = get_xid();
2975                 /* this is probably better than directly calling
2976                    partialpage_write since in this function the file handle is
2977                    known which we might as well leverage */
2978                 /* BB check if anything else missing out of ppw
2979                    such as updating last write time */
2980                 page_data = kmap(page);
2981                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2982                 /* if (rc < 0) should we set writebehind rc? */
2983                 kunmap(page);
2984
2985                 free_xid(xid);
2986         } else {
2987                 rc = copied;
2988                 pos += copied;
2989                 set_page_dirty(page);
2990         }
2991
2992         if (rc > 0) {
2993                 spin_lock(&inode->i_lock);
2994                 if (pos > inode->i_size) {
2995                         i_size_write(inode, pos);
2996                         inode->i_blocks = (512 - 1 + pos) >> 9;
2997                 }
2998                 spin_unlock(&inode->i_lock);
2999         }
3000
3001         unlock_page(page);
3002         put_page(page);
3003         /* Indication to update ctime and mtime as close is deferred */
3004         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3005
3006         return rc;
3007 }
3008
3009 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3010                       int datasync)
3011 {
3012         unsigned int xid;
3013         int rc = 0;
3014         struct cifs_tcon *tcon;
3015         struct TCP_Server_Info *server;
3016         struct cifsFileInfo *smbfile = file->private_data;
3017         struct inode *inode = file_inode(file);
3018         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3019
3020         rc = file_write_and_wait_range(file, start, end);
3021         if (rc) {
3022                 trace_cifs_fsync_err(inode->i_ino, rc);
3023                 return rc;
3024         }
3025
3026         xid = get_xid();
3027
3028         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3029                  file, datasync);
3030
3031         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3032                 rc = cifs_zap_mapping(inode);
3033                 if (rc) {
3034                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3035                         rc = 0; /* don't care about it in fsync */
3036                 }
3037         }
3038
3039         tcon = tlink_tcon(smbfile->tlink);
3040         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3041                 server = tcon->ses->server;
3042                 if (server->ops->flush == NULL) {
3043                         rc = -ENOSYS;
3044                         goto strict_fsync_exit;
3045                 }
3046
3047                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3048                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3049                         if (smbfile) {
3050                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3051                                 cifsFileInfo_put(smbfile);
3052                         } else
3053                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3054                 } else
3055                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3056         }
3057
3058 strict_fsync_exit:
3059         free_xid(xid);
3060         return rc;
3061 }
3062
3063 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3064 {
3065         unsigned int xid;
3066         int rc = 0;
3067         struct cifs_tcon *tcon;
3068         struct TCP_Server_Info *server;
3069         struct cifsFileInfo *smbfile = file->private_data;
3070         struct inode *inode = file_inode(file);
3071         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3072
3073         rc = file_write_and_wait_range(file, start, end);
3074         if (rc) {
3075                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3076                 return rc;
3077         }
3078
3079         xid = get_xid();
3080
3081         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3082                  file, datasync);
3083
3084         tcon = tlink_tcon(smbfile->tlink);
3085         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3086                 server = tcon->ses->server;
3087                 if (server->ops->flush == NULL) {
3088                         rc = -ENOSYS;
3089                         goto fsync_exit;
3090                 }
3091
3092                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3093                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3094                         if (smbfile) {
3095                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3096                                 cifsFileInfo_put(smbfile);
3097                         } else
3098                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3099                 } else
3100                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3101         }
3102
3103 fsync_exit:
3104         free_xid(xid);
3105         return rc;
3106 }
3107
3108 /*
3109  * As file closes, flush all cached write data for this inode checking
3110  * for write behind errors.
3111  */
3112 int cifs_flush(struct file *file, fl_owner_t id)
3113 {
3114         struct inode *inode = file_inode(file);
3115         int rc = 0;
3116
3117         if (file->f_mode & FMODE_WRITE)
3118                 rc = filemap_write_and_wait(inode->i_mapping);
3119
3120         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3121         if (rc) {
3122                 /* get more nuanced writeback errors */
3123                 rc = filemap_check_wb_err(file->f_mapping, 0);
3124                 trace_cifs_flush_err(inode->i_ino, rc);
3125         }
3126         return rc;
3127 }
3128
3129 static int
3130 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
3131 {
3132         int rc = 0;
3133         unsigned long i;
3134
3135         for (i = 0; i < num_pages; i++) {
3136                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3137                 if (!pages[i]) {
3138                         /*
3139                          * save number of pages we have already allocated and
3140                          * return with ENOMEM error
3141                          */
3142                         num_pages = i;
3143                         rc = -ENOMEM;
3144                         break;
3145                 }
3146         }
3147
3148         if (rc) {
3149                 for (i = 0; i < num_pages; i++)
3150                         put_page(pages[i]);
3151         }
3152         return rc;
3153 }
3154
3155 static inline
3156 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
3157 {
3158         size_t num_pages;
3159         size_t clen;
3160
3161         clen = min_t(const size_t, len, wsize);
3162         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
3163
3164         if (cur_len)
3165                 *cur_len = clen;
3166
3167         return num_pages;
3168 }
3169
3170 static void
3171 cifs_uncached_writedata_release(struct kref *refcount)
3172 {
3173         int i;
3174         struct cifs_writedata *wdata = container_of(refcount,
3175                                         struct cifs_writedata, refcount);
3176
3177         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3178         for (i = 0; i < wdata->nr_pages; i++)
3179                 put_page(wdata->pages[i]);
3180         cifs_writedata_release(refcount);
3181 }
3182
3183 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3184
3185 static void
3186 cifs_uncached_writev_complete(struct work_struct *work)
3187 {
3188         struct cifs_writedata *wdata = container_of(work,
3189                                         struct cifs_writedata, work);
3190         struct inode *inode = d_inode(wdata->cfile->dentry);
3191         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3192
3193         spin_lock(&inode->i_lock);
3194         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3195         if (cifsi->server_eof > inode->i_size)
3196                 i_size_write(inode, cifsi->server_eof);
3197         spin_unlock(&inode->i_lock);
3198
3199         complete(&wdata->done);
3200         collect_uncached_write_data(wdata->ctx);
3201         /* the below call can possibly free the last ref to aio ctx */
3202         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3203 }
3204
3205 static int
3206 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
3207                       size_t *len, unsigned long *num_pages)
3208 {
3209         size_t save_len, copied, bytes, cur_len = *len;
3210         unsigned long i, nr_pages = *num_pages;
3211
3212         save_len = cur_len;
3213         for (i = 0; i < nr_pages; i++) {
3214                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
3215                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
3216                 cur_len -= copied;
3217                 /*
3218                  * If we didn't copy as much as we expected, then that
3219                  * may mean we trod into an unmapped area. Stop copying
3220                  * at that point. On the next pass through the big
3221                  * loop, we'll likely end up getting a zero-length
3222                  * write and bailing out of it.
3223                  */
3224                 if (copied < bytes)
3225                         break;
3226         }
3227         cur_len = save_len - cur_len;
3228         *len = cur_len;
3229
3230         /*
3231          * If we have no data to send, then that probably means that
3232          * the copy above failed altogether. That's most likely because
3233          * the address in the iovec was bogus. Return -EFAULT and let
3234          * the caller free anything we allocated and bail out.
3235          */
3236         if (!cur_len)
3237                 return -EFAULT;
3238
3239         /*
3240          * i + 1 now represents the number of pages we actually used in
3241          * the copy phase above.
3242          */
3243         *num_pages = i + 1;
3244         return 0;
3245 }
3246
3247 static int
3248 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3249         struct cifs_aio_ctx *ctx)
3250 {
3251         unsigned int wsize;
3252         struct cifs_credits credits;
3253         int rc;
3254         struct TCP_Server_Info *server = wdata->server;
3255
3256         do {
3257                 if (wdata->cfile->invalidHandle) {
3258                         rc = cifs_reopen_file(wdata->cfile, false);
3259                         if (rc == -EAGAIN)
3260                                 continue;
3261                         else if (rc)
3262                                 break;
3263                 }
3264
3265
3266                 /*
3267                  * Wait for credits to resend this wdata.
3268                  * Note: we are attempting to resend the whole wdata not in
3269                  * segments
3270                  */
3271                 do {
3272                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3273                                                 &wsize, &credits);
3274                         if (rc)
3275                                 goto fail;
3276
3277                         if (wsize < wdata->bytes) {
3278                                 add_credits_and_wake_if(server, &credits, 0);
3279                                 msleep(1000);
3280                         }
3281                 } while (wsize < wdata->bytes);
3282                 wdata->credits = credits;
3283
3284                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3285
3286                 if (!rc) {
3287                         if (wdata->cfile->invalidHandle)
3288                                 rc = -EAGAIN;
3289                         else {
3290 #ifdef CONFIG_CIFS_SMB_DIRECT
3291                                 if (wdata->mr) {
3292                                         wdata->mr->need_invalidate = true;
3293                                         smbd_deregister_mr(wdata->mr);
3294                                         wdata->mr = NULL;
3295                                 }
3296 #endif
3297                                 rc = server->ops->async_writev(wdata,
3298                                         cifs_uncached_writedata_release);
3299                         }
3300                 }
3301
3302                 /* If the write was successfully sent, we are done */
3303                 if (!rc) {
3304                         list_add_tail(&wdata->list, wdata_list);
3305                         return 0;
3306                 }
3307
3308                 /* Roll back credits and retry if needed */
3309                 add_credits_and_wake_if(server, &wdata->credits, 0);
3310         } while (rc == -EAGAIN);
3311
3312 fail:
3313         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3314         return rc;
3315 }
3316
3317 static int
3318 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
3319                      struct cifsFileInfo *open_file,
3320                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3321                      struct cifs_aio_ctx *ctx)
3322 {
3323         int rc = 0;
3324         size_t cur_len;
3325         unsigned long nr_pages, num_pages, i;
3326         struct cifs_writedata *wdata;
3327         struct iov_iter saved_from = *from;
3328         loff_t saved_offset = offset;
3329         pid_t pid;
3330         struct TCP_Server_Info *server;
3331         struct page **pagevec;
3332         size_t start;
3333         unsigned int xid;
3334
3335         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3336                 pid = open_file->pid;
3337         else
3338                 pid = current->tgid;
3339
3340         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3341         xid = get_xid();
3342
3343         do {
3344                 unsigned int wsize;
3345                 struct cifs_credits credits_on_stack;
3346                 struct cifs_credits *credits = &credits_on_stack;
3347
3348                 if (open_file->invalidHandle) {
3349                         rc = cifs_reopen_file(open_file, false);
3350                         if (rc == -EAGAIN)
3351                                 continue;
3352                         else if (rc)
3353                                 break;
3354                 }
3355
3356                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3357                                                    &wsize, credits);
3358                 if (rc)
3359                         break;
3360
3361                 cur_len = min_t(const size_t, len, wsize);
3362
3363                 if (ctx->direct_io) {
3364                         ssize_t result;
3365
3366                         result = iov_iter_get_pages_alloc2(
3367                                 from, &pagevec, cur_len, &start);
3368                         if (result < 0) {
3369                                 cifs_dbg(VFS,
3370                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3371                                          result, iov_iter_type(from),
3372                                          from->iov_offset, from->count);
3373                                 dump_stack();
3374
3375                                 rc = result;
3376                                 add_credits_and_wake_if(server, credits, 0);
3377                                 break;
3378                         }
3379                         cur_len = (size_t)result;
3380
3381                         nr_pages =
3382                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3383
3384                         wdata = cifs_writedata_direct_alloc(pagevec,
3385                                              cifs_uncached_writev_complete);
3386                         if (!wdata) {
3387                                 rc = -ENOMEM;
3388                                 for (i = 0; i < nr_pages; i++)
3389                                         put_page(pagevec[i]);
3390                                 kvfree(pagevec);
3391                                 add_credits_and_wake_if(server, credits, 0);
3392                                 break;
3393                         }
3394
3395
3396                         wdata->page_offset = start;
3397                         wdata->tailsz =
3398                                 nr_pages > 1 ?
3399                                         cur_len - (PAGE_SIZE - start) -
3400                                         (nr_pages - 2) * PAGE_SIZE :
3401                                         cur_len;
3402                 } else {
3403                         nr_pages = get_numpages(wsize, len, &cur_len);
3404                         wdata = cifs_writedata_alloc(nr_pages,
3405                                              cifs_uncached_writev_complete);
3406                         if (!wdata) {
3407                                 rc = -ENOMEM;
3408                                 add_credits_and_wake_if(server, credits, 0);
3409                                 break;
3410                         }
3411
3412                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3413                         if (rc) {
3414                                 kvfree(wdata->pages);
3415                                 kfree(wdata);
3416                                 add_credits_and_wake_if(server, credits, 0);
3417                                 break;
3418                         }
3419
3420                         num_pages = nr_pages;
3421                         rc = wdata_fill_from_iovec(
3422                                 wdata, from, &cur_len, &num_pages);
3423                         if (rc) {
3424                                 for (i = 0; i < nr_pages; i++)
3425                                         put_page(wdata->pages[i]);
3426                                 kvfree(wdata->pages);
3427                                 kfree(wdata);
3428                                 add_credits_and_wake_if(server, credits, 0);
3429                                 break;
3430                         }
3431
3432                         /*
3433                          * Bring nr_pages down to the number of pages we
3434                          * actually used, and free any pages that we didn't use.
3435                          */
3436                         for ( ; nr_pages > num_pages; nr_pages--)
3437                                 put_page(wdata->pages[nr_pages - 1]);
3438
3439                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3440                 }
3441
3442                 wdata->sync_mode = WB_SYNC_ALL;
3443                 wdata->nr_pages = nr_pages;
3444                 wdata->offset = (__u64)offset;
3445                 wdata->cfile = cifsFileInfo_get(open_file);
3446                 wdata->server = server;
3447                 wdata->pid = pid;
3448                 wdata->bytes = cur_len;
3449                 wdata->pagesz = PAGE_SIZE;
3450                 wdata->credits = credits_on_stack;
3451                 wdata->ctx = ctx;
3452                 kref_get(&ctx->refcount);
3453
3454                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3455
3456                 if (!rc) {
3457                         if (wdata->cfile->invalidHandle)
3458                                 rc = -EAGAIN;
3459                         else
3460                                 rc = server->ops->async_writev(wdata,
3461                                         cifs_uncached_writedata_release);
3462                 }
3463
3464                 if (rc) {
3465                         add_credits_and_wake_if(server, &wdata->credits, 0);
3466                         kref_put(&wdata->refcount,
3467                                  cifs_uncached_writedata_release);
3468                         if (rc == -EAGAIN) {
3469                                 *from = saved_from;
3470                                 iov_iter_advance(from, offset - saved_offset);
3471                                 continue;
3472                         }
3473                         break;
3474                 }
3475
3476                 list_add_tail(&wdata->list, wdata_list);
3477                 offset += cur_len;
3478                 len -= cur_len;
3479         } while (len > 0);
3480
3481         free_xid(xid);
3482         return rc;
3483 }
3484
3485 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3486 {
3487         struct cifs_writedata *wdata, *tmp;
3488         struct cifs_tcon *tcon;
3489         struct cifs_sb_info *cifs_sb;
3490         struct dentry *dentry = ctx->cfile->dentry;
3491         ssize_t rc;
3492
3493         tcon = tlink_tcon(ctx->cfile->tlink);
3494         cifs_sb = CIFS_SB(dentry->d_sb);
3495
3496         mutex_lock(&ctx->aio_mutex);
3497
3498         if (list_empty(&ctx->list)) {
3499                 mutex_unlock(&ctx->aio_mutex);
3500                 return;
3501         }
3502
3503         rc = ctx->rc;
3504         /*
3505          * Wait for and collect replies for any successful sends in order of
3506          * increasing offset. Once an error is hit, then return without waiting
3507          * for any more replies.
3508          */
3509 restart_loop:
3510         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3511                 if (!rc) {
3512                         if (!try_wait_for_completion(&wdata->done)) {
3513                                 mutex_unlock(&ctx->aio_mutex);
3514                                 return;
3515                         }
3516
3517                         if (wdata->result)
3518                                 rc = wdata->result;
3519                         else
3520                                 ctx->total_len += wdata->bytes;
3521
3522                         /* resend call if it's a retryable error */
3523                         if (rc == -EAGAIN) {
3524                                 struct list_head tmp_list;
3525                                 struct iov_iter tmp_from = ctx->iter;
3526
3527                                 INIT_LIST_HEAD(&tmp_list);
3528                                 list_del_init(&wdata->list);
3529
3530                                 if (ctx->direct_io)
3531                                         rc = cifs_resend_wdata(
3532                                                 wdata, &tmp_list, ctx);
3533                                 else {
3534                                         iov_iter_advance(&tmp_from,
3535                                                  wdata->offset - ctx->pos);
3536
3537                                         rc = cifs_write_from_iter(wdata->offset,
3538                                                 wdata->bytes, &tmp_from,
3539                                                 ctx->cfile, cifs_sb, &tmp_list,
3540                                                 ctx);
3541
3542                                         kref_put(&wdata->refcount,
3543                                                 cifs_uncached_writedata_release);
3544                                 }
3545
3546                                 list_splice(&tmp_list, &ctx->list);
3547                                 goto restart_loop;
3548                         }
3549                 }
3550                 list_del_init(&wdata->list);
3551                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3552         }
3553
3554         cifs_stats_bytes_written(tcon, ctx->total_len);
3555         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3556
3557         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3558
3559         mutex_unlock(&ctx->aio_mutex);
3560
3561         if (ctx->iocb && ctx->iocb->ki_complete)
3562                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3563         else
3564                 complete(&ctx->done);
3565 }
3566
3567 static ssize_t __cifs_writev(
3568         struct kiocb *iocb, struct iov_iter *from, bool direct)
3569 {
3570         struct file *file = iocb->ki_filp;
3571         ssize_t total_written = 0;
3572         struct cifsFileInfo *cfile;
3573         struct cifs_tcon *tcon;
3574         struct cifs_sb_info *cifs_sb;
3575         struct cifs_aio_ctx *ctx;
3576         struct iov_iter saved_from = *from;
3577         size_t len = iov_iter_count(from);
3578         int rc;
3579
3580         /*
3581          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3582          * In this case, fall back to non-direct write function.
3583          * this could be improved by getting pages directly in ITER_KVEC
3584          */
3585         if (direct && iov_iter_is_kvec(from)) {
3586                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3587                 direct = false;
3588         }
3589
3590         rc = generic_write_checks(iocb, from);
3591         if (rc <= 0)
3592                 return rc;
3593
3594         cifs_sb = CIFS_FILE_SB(file);
3595         cfile = file->private_data;
3596         tcon = tlink_tcon(cfile->tlink);
3597
3598         if (!tcon->ses->server->ops->async_writev)
3599                 return -ENOSYS;
3600
3601         ctx = cifs_aio_ctx_alloc();
3602         if (!ctx)
3603                 return -ENOMEM;
3604
3605         ctx->cfile = cifsFileInfo_get(cfile);
3606
3607         if (!is_sync_kiocb(iocb))
3608                 ctx->iocb = iocb;
3609
3610         ctx->pos = iocb->ki_pos;
3611
3612         if (direct) {
3613                 ctx->direct_io = true;
3614                 ctx->iter = *from;
3615                 ctx->len = len;
3616         } else {
3617                 rc = setup_aio_ctx_iter(ctx, from, ITER_SOURCE);
3618                 if (rc) {
3619                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3620                         return rc;
3621                 }
3622         }
3623
3624         /* grab a lock here due to read response handlers can access ctx */
3625         mutex_lock(&ctx->aio_mutex);
3626
3627         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3628                                   cfile, cifs_sb, &ctx->list, ctx);
3629
3630         /*
3631          * If at least one write was successfully sent, then discard any rc
3632          * value from the later writes. If the other write succeeds, then
3633          * we'll end up returning whatever was written. If it fails, then
3634          * we'll get a new rc value from that.
3635          */
3636         if (!list_empty(&ctx->list))
3637                 rc = 0;
3638
3639         mutex_unlock(&ctx->aio_mutex);
3640
3641         if (rc) {
3642                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3643                 return rc;
3644         }
3645
3646         if (!is_sync_kiocb(iocb)) {
3647                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3648                 return -EIOCBQUEUED;
3649         }
3650
3651         rc = wait_for_completion_killable(&ctx->done);
3652         if (rc) {
3653                 mutex_lock(&ctx->aio_mutex);
3654                 ctx->rc = rc = -EINTR;
3655                 total_written = ctx->total_len;
3656                 mutex_unlock(&ctx->aio_mutex);
3657         } else {
3658                 rc = ctx->rc;
3659                 total_written = ctx->total_len;
3660         }
3661
3662         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3663
3664         if (unlikely(!total_written))
3665                 return rc;
3666
3667         iocb->ki_pos += total_written;
3668         return total_written;
3669 }
3670
3671 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3672 {
3673         struct file *file = iocb->ki_filp;
3674
3675         cifs_revalidate_mapping(file->f_inode);
3676         return __cifs_writev(iocb, from, true);
3677 }
3678
3679 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3680 {
3681         return __cifs_writev(iocb, from, false);
3682 }
3683
3684 static ssize_t
3685 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3686 {
3687         struct file *file = iocb->ki_filp;
3688         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3689         struct inode *inode = file->f_mapping->host;
3690         struct cifsInodeInfo *cinode = CIFS_I(inode);
3691         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3692         ssize_t rc;
3693
3694         inode_lock(inode);
3695         /*
3696          * We need to hold the sem to be sure nobody modifies lock list
3697          * with a brlock that prevents writing.
3698          */
3699         down_read(&cinode->lock_sem);
3700
3701         rc = generic_write_checks(iocb, from);
3702         if (rc <= 0)
3703                 goto out;
3704
3705         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3706                                      server->vals->exclusive_lock_type, 0,
3707                                      NULL, CIFS_WRITE_OP))
3708                 rc = __generic_file_write_iter(iocb, from);
3709         else
3710                 rc = -EACCES;
3711 out:
3712         up_read(&cinode->lock_sem);
3713         inode_unlock(inode);
3714
3715         if (rc > 0)
3716                 rc = generic_write_sync(iocb, rc);
3717         return rc;
3718 }
3719
3720 ssize_t
3721 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3722 {
3723         struct inode *inode = file_inode(iocb->ki_filp);
3724         struct cifsInodeInfo *cinode = CIFS_I(inode);
3725         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3726         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3727                                                 iocb->ki_filp->private_data;
3728         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3729         ssize_t written;
3730
3731         written = cifs_get_writer(cinode);
3732         if (written)
3733                 return written;
3734
3735         if (CIFS_CACHE_WRITE(cinode)) {
3736                 if (cap_unix(tcon->ses) &&
3737                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3738                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3739                         written = generic_file_write_iter(iocb, from);
3740                         goto out;
3741                 }
3742                 written = cifs_writev(iocb, from);
3743                 goto out;
3744         }
3745         /*
3746          * For non-oplocked files in strict cache mode we need to write the data
3747          * to the server exactly from the pos to pos+len-1 rather than flush all
3748          * affected pages because it may cause a error with mandatory locks on
3749          * these pages but not on the region from pos to ppos+len-1.
3750          */
3751         written = cifs_user_writev(iocb, from);
3752         if (CIFS_CACHE_READ(cinode)) {
3753                 /*
3754                  * We have read level caching and we have just sent a write
3755                  * request to the server thus making data in the cache stale.
3756                  * Zap the cache and set oplock/lease level to NONE to avoid
3757                  * reading stale data from the cache. All subsequent read
3758                  * operations will read new data from the server.
3759                  */
3760                 cifs_zap_mapping(inode);
3761                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3762                          inode);
3763                 cinode->oplock = 0;
3764         }
3765 out:
3766         cifs_put_writer(cinode);
3767         return written;
3768 }
3769
3770 static struct cifs_readdata *
3771 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3772 {
3773         struct cifs_readdata *rdata;
3774
3775         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3776         if (rdata != NULL) {
3777                 rdata->pages = pages;
3778                 kref_init(&rdata->refcount);
3779                 INIT_LIST_HEAD(&rdata->list);
3780                 init_completion(&rdata->done);
3781                 INIT_WORK(&rdata->work, complete);
3782         }
3783
3784         return rdata;
3785 }
3786
3787 static struct cifs_readdata *
3788 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3789 {
3790         struct page **pages =
3791                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3792         struct cifs_readdata *ret = NULL;
3793
3794         if (pages) {
3795                 ret = cifs_readdata_direct_alloc(pages, complete);
3796                 if (!ret)
3797                         kfree(pages);
3798         }
3799
3800         return ret;
3801 }
3802
3803 void
3804 cifs_readdata_release(struct kref *refcount)
3805 {
3806         struct cifs_readdata *rdata = container_of(refcount,
3807                                         struct cifs_readdata, refcount);
3808 #ifdef CONFIG_CIFS_SMB_DIRECT
3809         if (rdata->mr) {
3810                 smbd_deregister_mr(rdata->mr);
3811                 rdata->mr = NULL;
3812         }
3813 #endif
3814         if (rdata->cfile)
3815                 cifsFileInfo_put(rdata->cfile);
3816
3817         kvfree(rdata->pages);
3818         kfree(rdata);
3819 }
3820
3821 static int
3822 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3823 {
3824         int rc = 0;
3825         struct page *page;
3826         unsigned int i;
3827
3828         for (i = 0; i < nr_pages; i++) {
3829                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3830                 if (!page) {
3831                         rc = -ENOMEM;
3832                         break;
3833                 }
3834                 rdata->pages[i] = page;
3835         }
3836
3837         if (rc) {
3838                 unsigned int nr_page_failed = i;
3839
3840                 for (i = 0; i < nr_page_failed; i++) {
3841                         put_page(rdata->pages[i]);
3842                         rdata->pages[i] = NULL;
3843                 }
3844         }
3845         return rc;
3846 }
3847
3848 static void
3849 cifs_uncached_readdata_release(struct kref *refcount)
3850 {
3851         struct cifs_readdata *rdata = container_of(refcount,
3852                                         struct cifs_readdata, refcount);
3853         unsigned int i;
3854
3855         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3856         for (i = 0; i < rdata->nr_pages; i++) {
3857                 put_page(rdata->pages[i]);
3858         }
3859         cifs_readdata_release(refcount);
3860 }
3861
3862 /**
3863  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3864  * @rdata:      the readdata response with list of pages holding data
3865  * @iter:       destination for our data
3866  *
3867  * This function copies data from a list of pages in a readdata response into
3868  * an array of iovecs. It will first calculate where the data should go
3869  * based on the info in the readdata and then copy the data into that spot.
3870  */
3871 static int
3872 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3873 {
3874         size_t remaining = rdata->got_bytes;
3875         unsigned int i;
3876
3877         for (i = 0; i < rdata->nr_pages; i++) {
3878                 struct page *page = rdata->pages[i];
3879                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3880                 size_t written;
3881
3882                 if (unlikely(iov_iter_is_pipe(iter))) {
3883                         void *addr = kmap_atomic(page);
3884
3885                         written = copy_to_iter(addr, copy, iter);
3886                         kunmap_atomic(addr);
3887                 } else
3888                         written = copy_page_to_iter(page, 0, copy, iter);
3889                 remaining -= written;
3890                 if (written < copy && iov_iter_count(iter) > 0)
3891                         break;
3892         }
3893         return remaining ? -EFAULT : 0;
3894 }
3895
3896 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3897
3898 static void
3899 cifs_uncached_readv_complete(struct work_struct *work)
3900 {
3901         struct cifs_readdata *rdata = container_of(work,
3902                                                 struct cifs_readdata, work);
3903
3904         complete(&rdata->done);
3905         collect_uncached_read_data(rdata->ctx);
3906         /* the below call can possibly free the last ref to aio ctx */
3907         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3908 }
3909
3910 static int
3911 uncached_fill_pages(struct TCP_Server_Info *server,
3912                     struct cifs_readdata *rdata, struct iov_iter *iter,
3913                     unsigned int len)
3914 {
3915         int result = 0;
3916         unsigned int i;
3917         unsigned int nr_pages = rdata->nr_pages;
3918         unsigned int page_offset = rdata->page_offset;
3919
3920         rdata->got_bytes = 0;
3921         rdata->tailsz = PAGE_SIZE;
3922         for (i = 0; i < nr_pages; i++) {
3923                 struct page *page = rdata->pages[i];
3924                 size_t n;
3925                 unsigned int segment_size = rdata->pagesz;
3926
3927                 if (i == 0)
3928                         segment_size -= page_offset;
3929                 else
3930                         page_offset = 0;
3931
3932
3933                 if (len <= 0) {
3934                         /* no need to hold page hostage */
3935                         rdata->pages[i] = NULL;
3936                         rdata->nr_pages--;
3937                         put_page(page);
3938                         continue;
3939                 }
3940
3941                 n = len;
3942                 if (len >= segment_size)
3943                         /* enough data to fill the page */
3944                         n = segment_size;
3945                 else
3946                         rdata->tailsz = len;
3947                 len -= n;
3948
3949                 if (iter)
3950                         result = copy_page_from_iter(
3951                                         page, page_offset, n, iter);
3952 #ifdef CONFIG_CIFS_SMB_DIRECT
3953                 else if (rdata->mr)
3954                         result = n;
3955 #endif
3956                 else
3957                         result = cifs_read_page_from_socket(
3958                                         server, page, page_offset, n);
3959                 if (result < 0)
3960                         break;
3961
3962                 rdata->got_bytes += result;
3963         }
3964
3965         return result != -ECONNABORTED && rdata->got_bytes > 0 ?
3966                                                 rdata->got_bytes : result;
3967 }
3968
3969 static int
3970 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3971                               struct cifs_readdata *rdata, unsigned int len)
3972 {
3973         return uncached_fill_pages(server, rdata, NULL, len);
3974 }
3975
3976 static int
3977 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3978                               struct cifs_readdata *rdata,
3979                               struct iov_iter *iter)
3980 {
3981         return uncached_fill_pages(server, rdata, iter, iter->count);
3982 }
3983
3984 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3985                         struct list_head *rdata_list,
3986                         struct cifs_aio_ctx *ctx)
3987 {
3988         unsigned int rsize;
3989         struct cifs_credits credits;
3990         int rc;
3991         struct TCP_Server_Info *server;
3992
3993         /* XXX: should we pick a new channel here? */
3994         server = rdata->server;
3995
3996         do {
3997                 if (rdata->cfile->invalidHandle) {
3998                         rc = cifs_reopen_file(rdata->cfile, true);
3999                         if (rc == -EAGAIN)
4000                                 continue;
4001                         else if (rc)
4002                                 break;
4003                 }
4004
4005                 /*
4006                  * Wait for credits to resend this rdata.
4007                  * Note: we are attempting to resend the whole rdata not in
4008                  * segments
4009                  */
4010                 do {
4011                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
4012                                                 &rsize, &credits);
4013
4014                         if (rc)
4015                                 goto fail;
4016
4017                         if (rsize < rdata->bytes) {
4018                                 add_credits_and_wake_if(server, &credits, 0);
4019                                 msleep(1000);
4020                         }
4021                 } while (rsize < rdata->bytes);
4022                 rdata->credits = credits;
4023
4024                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4025                 if (!rc) {
4026                         if (rdata->cfile->invalidHandle)
4027                                 rc = -EAGAIN;
4028                         else {
4029 #ifdef CONFIG_CIFS_SMB_DIRECT
4030                                 if (rdata->mr) {
4031                                         rdata->mr->need_invalidate = true;
4032                                         smbd_deregister_mr(rdata->mr);
4033                                         rdata->mr = NULL;
4034                                 }
4035 #endif
4036                                 rc = server->ops->async_readv(rdata);
4037                         }
4038                 }
4039
4040                 /* If the read was successfully sent, we are done */
4041                 if (!rc) {
4042                         /* Add to aio pending list */
4043                         list_add_tail(&rdata->list, rdata_list);
4044                         return 0;
4045                 }
4046
4047                 /* Roll back credits and retry if needed */
4048                 add_credits_and_wake_if(server, &rdata->credits, 0);
4049         } while (rc == -EAGAIN);
4050
4051 fail:
4052         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
4053         return rc;
4054 }
4055
4056 static int
4057 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
4058                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
4059                      struct cifs_aio_ctx *ctx)
4060 {
4061         struct cifs_readdata *rdata;
4062         unsigned int npages, rsize;
4063         struct cifs_credits credits_on_stack;
4064         struct cifs_credits *credits = &credits_on_stack;
4065         size_t cur_len;
4066         int rc;
4067         pid_t pid;
4068         struct TCP_Server_Info *server;
4069         struct page **pagevec;
4070         size_t start;
4071         struct iov_iter direct_iov = ctx->iter;
4072
4073         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4074
4075         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4076                 pid = open_file->pid;
4077         else
4078                 pid = current->tgid;
4079
4080         if (ctx->direct_io)
4081                 iov_iter_advance(&direct_iov, offset - ctx->pos);
4082
4083         do {
4084                 if (open_file->invalidHandle) {
4085                         rc = cifs_reopen_file(open_file, true);
4086                         if (rc == -EAGAIN)
4087                                 continue;
4088                         else if (rc)
4089                                 break;
4090                 }
4091
4092                 if (cifs_sb->ctx->rsize == 0)
4093                         cifs_sb->ctx->rsize =
4094                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4095                                                              cifs_sb->ctx);
4096
4097                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4098                                                    &rsize, credits);
4099                 if (rc)
4100                         break;
4101
4102                 cur_len = min_t(const size_t, len, rsize);
4103
4104                 if (ctx->direct_io) {
4105                         ssize_t result;
4106
4107                         result = iov_iter_get_pages_alloc2(
4108                                         &direct_iov, &pagevec,
4109                                         cur_len, &start);
4110                         if (result < 0) {
4111                                 cifs_dbg(VFS,
4112                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
4113                                          result, iov_iter_type(&direct_iov),
4114                                          direct_iov.iov_offset,
4115                                          direct_iov.count);
4116                                 dump_stack();
4117
4118                                 rc = result;
4119                                 add_credits_and_wake_if(server, credits, 0);
4120                                 break;
4121                         }
4122                         cur_len = (size_t)result;
4123
4124                         rdata = cifs_readdata_direct_alloc(
4125                                         pagevec, cifs_uncached_readv_complete);
4126                         if (!rdata) {
4127                                 add_credits_and_wake_if(server, credits, 0);
4128                                 rc = -ENOMEM;
4129                                 break;
4130                         }
4131
4132                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
4133                         rdata->page_offset = start;
4134                         rdata->tailsz = npages > 1 ?
4135                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
4136                                 cur_len;
4137
4138                 } else {
4139
4140                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
4141                         /* allocate a readdata struct */
4142                         rdata = cifs_readdata_alloc(npages,
4143                                             cifs_uncached_readv_complete);
4144                         if (!rdata) {
4145                                 add_credits_and_wake_if(server, credits, 0);
4146                                 rc = -ENOMEM;
4147                                 break;
4148                         }
4149
4150                         rc = cifs_read_allocate_pages(rdata, npages);
4151                         if (rc) {
4152                                 kvfree(rdata->pages);
4153                                 kfree(rdata);
4154                                 add_credits_and_wake_if(server, credits, 0);
4155                                 break;
4156                         }
4157
4158                         rdata->tailsz = PAGE_SIZE;
4159                 }
4160
4161                 rdata->server = server;
4162                 rdata->cfile = cifsFileInfo_get(open_file);
4163                 rdata->nr_pages = npages;
4164                 rdata->offset = offset;
4165                 rdata->bytes = cur_len;
4166                 rdata->pid = pid;
4167                 rdata->pagesz = PAGE_SIZE;
4168                 rdata->read_into_pages = cifs_uncached_read_into_pages;
4169                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
4170                 rdata->credits = credits_on_stack;
4171                 rdata->ctx = ctx;
4172                 kref_get(&ctx->refcount);
4173
4174                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4175
4176                 if (!rc) {
4177                         if (rdata->cfile->invalidHandle)
4178                                 rc = -EAGAIN;
4179                         else
4180                                 rc = server->ops->async_readv(rdata);
4181                 }
4182
4183                 if (rc) {
4184                         add_credits_and_wake_if(server, &rdata->credits, 0);
4185                         kref_put(&rdata->refcount,
4186                                 cifs_uncached_readdata_release);
4187                         if (rc == -EAGAIN) {
4188                                 iov_iter_revert(&direct_iov, cur_len);
4189                                 continue;
4190                         }
4191                         break;
4192                 }
4193
4194                 list_add_tail(&rdata->list, rdata_list);
4195                 offset += cur_len;
4196                 len -= cur_len;
4197         } while (len > 0);
4198
4199         return rc;
4200 }
4201
4202 static void
4203 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4204 {
4205         struct cifs_readdata *rdata, *tmp;
4206         struct iov_iter *to = &ctx->iter;
4207         struct cifs_sb_info *cifs_sb;
4208         int rc;
4209
4210         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4211
4212         mutex_lock(&ctx->aio_mutex);
4213
4214         if (list_empty(&ctx->list)) {
4215                 mutex_unlock(&ctx->aio_mutex);
4216                 return;
4217         }
4218
4219         rc = ctx->rc;
4220         /* the loop below should proceed in the order of increasing offsets */
4221 again:
4222         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4223                 if (!rc) {
4224                         if (!try_wait_for_completion(&rdata->done)) {
4225                                 mutex_unlock(&ctx->aio_mutex);
4226                                 return;
4227                         }
4228
4229                         if (rdata->result == -EAGAIN) {
4230                                 /* resend call if it's a retryable error */
4231                                 struct list_head tmp_list;
4232                                 unsigned int got_bytes = rdata->got_bytes;
4233
4234                                 list_del_init(&rdata->list);
4235                                 INIT_LIST_HEAD(&tmp_list);
4236
4237                                 /*
4238                                  * Got a part of data and then reconnect has
4239                                  * happened -- fill the buffer and continue
4240                                  * reading.
4241                                  */
4242                                 if (got_bytes && got_bytes < rdata->bytes) {
4243                                         rc = 0;
4244                                         if (!ctx->direct_io)
4245                                                 rc = cifs_readdata_to_iov(rdata, to);
4246                                         if (rc) {
4247                                                 kref_put(&rdata->refcount,
4248                                                         cifs_uncached_readdata_release);
4249                                                 continue;
4250                                         }
4251                                 }
4252
4253                                 if (ctx->direct_io) {
4254                                         /*
4255                                          * Re-use rdata as this is a
4256                                          * direct I/O
4257                                          */
4258                                         rc = cifs_resend_rdata(
4259                                                 rdata,
4260                                                 &tmp_list, ctx);
4261                                 } else {
4262                                         rc = cifs_send_async_read(
4263                                                 rdata->offset + got_bytes,
4264                                                 rdata->bytes - got_bytes,
4265                                                 rdata->cfile, cifs_sb,
4266                                                 &tmp_list, ctx);
4267
4268                                         kref_put(&rdata->refcount,
4269                                                 cifs_uncached_readdata_release);
4270                                 }
4271
4272                                 list_splice(&tmp_list, &ctx->list);
4273
4274                                 goto again;
4275                         } else if (rdata->result)
4276                                 rc = rdata->result;
4277                         else if (!ctx->direct_io)
4278                                 rc = cifs_readdata_to_iov(rdata, to);
4279
4280                         /* if there was a short read -- discard anything left */
4281                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4282                                 rc = -ENODATA;
4283
4284                         ctx->total_len += rdata->got_bytes;
4285                 }
4286                 list_del_init(&rdata->list);
4287                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
4288         }
4289
4290         if (!ctx->direct_io)
4291                 ctx->total_len = ctx->len - iov_iter_count(to);
4292
4293         /* mask nodata case */
4294         if (rc == -ENODATA)
4295                 rc = 0;
4296
4297         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4298
4299         mutex_unlock(&ctx->aio_mutex);
4300
4301         if (ctx->iocb && ctx->iocb->ki_complete)
4302                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4303         else
4304                 complete(&ctx->done);
4305 }
4306
4307 static ssize_t __cifs_readv(
4308         struct kiocb *iocb, struct iov_iter *to, bool direct)
4309 {
4310         size_t len;
4311         struct file *file = iocb->ki_filp;
4312         struct cifs_sb_info *cifs_sb;
4313         struct cifsFileInfo *cfile;
4314         struct cifs_tcon *tcon;
4315         ssize_t rc, total_read = 0;
4316         loff_t offset = iocb->ki_pos;
4317         struct cifs_aio_ctx *ctx;
4318
4319         /*
4320          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
4321          * fall back to data copy read path
4322          * this could be improved by getting pages directly in ITER_KVEC
4323          */
4324         if (direct && iov_iter_is_kvec(to)) {
4325                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
4326                 direct = false;
4327         }
4328
4329         len = iov_iter_count(to);
4330         if (!len)
4331                 return 0;
4332
4333         cifs_sb = CIFS_FILE_SB(file);
4334         cfile = file->private_data;
4335         tcon = tlink_tcon(cfile->tlink);
4336
4337         if (!tcon->ses->server->ops->async_readv)
4338                 return -ENOSYS;
4339
4340         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4341                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4342
4343         ctx = cifs_aio_ctx_alloc();
4344         if (!ctx)
4345                 return -ENOMEM;
4346
4347         ctx->cfile = cifsFileInfo_get(cfile);
4348
4349         if (!is_sync_kiocb(iocb))
4350                 ctx->iocb = iocb;
4351
4352         if (user_backed_iter(to))
4353                 ctx->should_dirty = true;
4354
4355         if (direct) {
4356                 ctx->pos = offset;
4357                 ctx->direct_io = true;
4358                 ctx->iter = *to;
4359                 ctx->len = len;
4360         } else {
4361                 rc = setup_aio_ctx_iter(ctx, to, ITER_DEST);
4362                 if (rc) {
4363                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4364                         return rc;
4365                 }
4366                 len = ctx->len;
4367         }
4368
4369         if (direct) {
4370                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4371                                                   offset, offset + len - 1);
4372                 if (rc) {
4373                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4374                         return -EAGAIN;
4375                 }
4376         }
4377
4378         /* grab a lock here due to read response handlers can access ctx */
4379         mutex_lock(&ctx->aio_mutex);
4380
4381         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4382
4383         /* if at least one read request send succeeded, then reset rc */
4384         if (!list_empty(&ctx->list))
4385                 rc = 0;
4386
4387         mutex_unlock(&ctx->aio_mutex);
4388
4389         if (rc) {
4390                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4391                 return rc;
4392         }
4393
4394         if (!is_sync_kiocb(iocb)) {
4395                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4396                 return -EIOCBQUEUED;
4397         }
4398
4399         rc = wait_for_completion_killable(&ctx->done);
4400         if (rc) {
4401                 mutex_lock(&ctx->aio_mutex);
4402                 ctx->rc = rc = -EINTR;
4403                 total_read = ctx->total_len;
4404                 mutex_unlock(&ctx->aio_mutex);
4405         } else {
4406                 rc = ctx->rc;
4407                 total_read = ctx->total_len;
4408         }
4409
4410         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4411
4412         if (total_read) {
4413                 iocb->ki_pos += total_read;
4414                 return total_read;
4415         }
4416         return rc;
4417 }
4418
4419 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4420 {
4421         return __cifs_readv(iocb, to, true);
4422 }
4423
4424 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4425 {
4426         return __cifs_readv(iocb, to, false);
4427 }
4428
4429 ssize_t
4430 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4431 {
4432         struct inode *inode = file_inode(iocb->ki_filp);
4433         struct cifsInodeInfo *cinode = CIFS_I(inode);
4434         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4435         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4436                                                 iocb->ki_filp->private_data;
4437         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4438         int rc = -EACCES;
4439
4440         /*
4441          * In strict cache mode we need to read from the server all the time
4442          * if we don't have level II oplock because the server can delay mtime
4443          * change - so we can't make a decision about inode invalidating.
4444          * And we can also fail with pagereading if there are mandatory locks
4445          * on pages affected by this read but not on the region from pos to
4446          * pos+len-1.
4447          */
4448         if (!CIFS_CACHE_READ(cinode))
4449                 return cifs_user_readv(iocb, to);
4450
4451         if (cap_unix(tcon->ses) &&
4452             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4453             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4454                 return generic_file_read_iter(iocb, to);
4455
4456         /*
4457          * We need to hold the sem to be sure nobody modifies lock list
4458          * with a brlock that prevents reading.
4459          */
4460         down_read(&cinode->lock_sem);
4461         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4462                                      tcon->ses->server->vals->shared_lock_type,
4463                                      0, NULL, CIFS_READ_OP))
4464                 rc = generic_file_read_iter(iocb, to);
4465         up_read(&cinode->lock_sem);
4466         return rc;
4467 }
4468
4469 static ssize_t
4470 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4471 {
4472         int rc = -EACCES;
4473         unsigned int bytes_read = 0;
4474         unsigned int total_read;
4475         unsigned int current_read_size;
4476         unsigned int rsize;
4477         struct cifs_sb_info *cifs_sb;
4478         struct cifs_tcon *tcon;
4479         struct TCP_Server_Info *server;
4480         unsigned int xid;
4481         char *cur_offset;
4482         struct cifsFileInfo *open_file;
4483         struct cifs_io_parms io_parms = {0};
4484         int buf_type = CIFS_NO_BUFFER;
4485         __u32 pid;
4486
4487         xid = get_xid();
4488         cifs_sb = CIFS_FILE_SB(file);
4489
4490         /* FIXME: set up handlers for larger reads and/or convert to async */
4491         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4492
4493         if (file->private_data == NULL) {
4494                 rc = -EBADF;
4495                 free_xid(xid);
4496                 return rc;
4497         }
4498         open_file = file->private_data;
4499         tcon = tlink_tcon(open_file->tlink);
4500         server = cifs_pick_channel(tcon->ses);
4501
4502         if (!server->ops->sync_read) {
4503                 free_xid(xid);
4504                 return -ENOSYS;
4505         }
4506
4507         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4508                 pid = open_file->pid;
4509         else
4510                 pid = current->tgid;
4511
4512         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4513                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4514
4515         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4516              total_read += bytes_read, cur_offset += bytes_read) {
4517                 do {
4518                         current_read_size = min_t(uint, read_size - total_read,
4519                                                   rsize);
4520                         /*
4521                          * For windows me and 9x we do not want to request more
4522                          * than it negotiated since it will refuse the read
4523                          * then.
4524                          */
4525                         if (!(tcon->ses->capabilities &
4526                                 tcon->ses->server->vals->cap_large_files)) {
4527                                 current_read_size = min_t(uint,
4528                                         current_read_size, CIFSMaxBufSize);
4529                         }
4530                         if (open_file->invalidHandle) {
4531                                 rc = cifs_reopen_file(open_file, true);
4532                                 if (rc != 0)
4533                                         break;
4534                         }
4535                         io_parms.pid = pid;
4536                         io_parms.tcon = tcon;
4537                         io_parms.offset = *offset;
4538                         io_parms.length = current_read_size;
4539                         io_parms.server = server;
4540                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4541                                                     &bytes_read, &cur_offset,
4542                                                     &buf_type);
4543                 } while (rc == -EAGAIN);
4544
4545                 if (rc || (bytes_read == 0)) {
4546                         if (total_read) {
4547                                 break;
4548                         } else {
4549                                 free_xid(xid);
4550                                 return rc;
4551                         }
4552                 } else {
4553                         cifs_stats_bytes_read(tcon, total_read);
4554                         *offset += bytes_read;
4555                 }
4556         }
4557         free_xid(xid);
4558         return total_read;
4559 }
4560
4561 /*
4562  * If the page is mmap'ed into a process' page tables, then we need to make
4563  * sure that it doesn't change while being written back.
4564  */
4565 static vm_fault_t
4566 cifs_page_mkwrite(struct vm_fault *vmf)
4567 {
4568         struct page *page = vmf->page;
4569
4570         /* Wait for the page to be written to the cache before we allow it to
4571          * be modified.  We then assume the entire page will need writing back.
4572          */
4573 #ifdef CONFIG_CIFS_FSCACHE
4574         if (PageFsCache(page) &&
4575             wait_on_page_fscache_killable(page) < 0)
4576                 return VM_FAULT_RETRY;
4577 #endif
4578
4579         wait_on_page_writeback(page);
4580
4581         if (lock_page_killable(page) < 0)
4582                 return VM_FAULT_RETRY;
4583         return VM_FAULT_LOCKED;
4584 }
4585
4586 static const struct vm_operations_struct cifs_file_vm_ops = {
4587         .fault = filemap_fault,
4588         .map_pages = filemap_map_pages,
4589         .page_mkwrite = cifs_page_mkwrite,
4590 };
4591
4592 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4593 {
4594         int xid, rc = 0;
4595         struct inode *inode = file_inode(file);
4596
4597         xid = get_xid();
4598
4599         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4600                 rc = cifs_zap_mapping(inode);
4601         if (!rc)
4602                 rc = generic_file_mmap(file, vma);
4603         if (!rc)
4604                 vma->vm_ops = &cifs_file_vm_ops;
4605
4606         free_xid(xid);
4607         return rc;
4608 }
4609
4610 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4611 {
4612         int rc, xid;
4613
4614         xid = get_xid();
4615
4616         rc = cifs_revalidate_file(file);
4617         if (rc)
4618                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4619                          rc);
4620         if (!rc)
4621                 rc = generic_file_mmap(file, vma);
4622         if (!rc)
4623                 vma->vm_ops = &cifs_file_vm_ops;
4624
4625         free_xid(xid);
4626         return rc;
4627 }
4628
4629 static void
4630 cifs_readv_complete(struct work_struct *work)
4631 {
4632         unsigned int i, got_bytes;
4633         struct cifs_readdata *rdata = container_of(work,
4634                                                 struct cifs_readdata, work);
4635
4636         got_bytes = rdata->got_bytes;
4637         for (i = 0; i < rdata->nr_pages; i++) {
4638                 struct page *page = rdata->pages[i];
4639
4640                 if (rdata->result == 0 ||
4641                     (rdata->result == -EAGAIN && got_bytes)) {
4642                         flush_dcache_page(page);
4643                         SetPageUptodate(page);
4644                 } else
4645                         SetPageError(page);
4646
4647                 if (rdata->result == 0 ||
4648                     (rdata->result == -EAGAIN && got_bytes))
4649                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4650
4651                 unlock_page(page);
4652
4653                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4654
4655                 put_page(page);
4656                 rdata->pages[i] = NULL;
4657         }
4658         kref_put(&rdata->refcount, cifs_readdata_release);
4659 }
4660
4661 static int
4662 readpages_fill_pages(struct TCP_Server_Info *server,
4663                      struct cifs_readdata *rdata, struct iov_iter *iter,
4664                      unsigned int len)
4665 {
4666         int result = 0;
4667         unsigned int i;
4668         u64 eof;
4669         pgoff_t eof_index;
4670         unsigned int nr_pages = rdata->nr_pages;
4671         unsigned int page_offset = rdata->page_offset;
4672
4673         /* determine the eof that the server (probably) has */
4674         eof = CIFS_I(rdata->mapping->host)->server_eof;
4675         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4676         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4677
4678         rdata->got_bytes = 0;
4679         rdata->tailsz = PAGE_SIZE;
4680         for (i = 0; i < nr_pages; i++) {
4681                 struct page *page = rdata->pages[i];
4682                 unsigned int to_read = rdata->pagesz;
4683                 size_t n;
4684
4685                 if (i == 0)
4686                         to_read -= page_offset;
4687                 else
4688                         page_offset = 0;
4689
4690                 n = to_read;
4691
4692                 if (len >= to_read) {
4693                         len -= to_read;
4694                 } else if (len > 0) {
4695                         /* enough for partial page, fill and zero the rest */
4696                         zero_user(page, len + page_offset, to_read - len);
4697                         n = rdata->tailsz = len;
4698                         len = 0;
4699                 } else if (page->index > eof_index) {
4700                         /*
4701                          * The VFS will not try to do readahead past the
4702                          * i_size, but it's possible that we have outstanding
4703                          * writes with gaps in the middle and the i_size hasn't
4704                          * caught up yet. Populate those with zeroed out pages
4705                          * to prevent the VFS from repeatedly attempting to
4706                          * fill them until the writes are flushed.
4707                          */
4708                         zero_user(page, 0, PAGE_SIZE);
4709                         flush_dcache_page(page);
4710                         SetPageUptodate(page);
4711                         unlock_page(page);
4712                         put_page(page);
4713                         rdata->pages[i] = NULL;
4714                         rdata->nr_pages--;
4715                         continue;
4716                 } else {
4717                         /* no need to hold page hostage */
4718                         unlock_page(page);
4719                         put_page(page);
4720                         rdata->pages[i] = NULL;
4721                         rdata->nr_pages--;
4722                         continue;
4723                 }
4724
4725                 if (iter)
4726                         result = copy_page_from_iter(
4727                                         page, page_offset, n, iter);
4728 #ifdef CONFIG_CIFS_SMB_DIRECT
4729                 else if (rdata->mr)
4730                         result = n;
4731 #endif
4732                 else
4733                         result = cifs_read_page_from_socket(
4734                                         server, page, page_offset, n);
4735                 if (result < 0)
4736                         break;
4737
4738                 rdata->got_bytes += result;
4739         }
4740
4741         return result != -ECONNABORTED && rdata->got_bytes > 0 ?
4742                                                 rdata->got_bytes : result;
4743 }
4744
4745 static int
4746 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4747                                struct cifs_readdata *rdata, unsigned int len)
4748 {
4749         return readpages_fill_pages(server, rdata, NULL, len);
4750 }
4751
4752 static int
4753 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4754                                struct cifs_readdata *rdata,
4755                                struct iov_iter *iter)
4756 {
4757         return readpages_fill_pages(server, rdata, iter, iter->count);
4758 }
4759
4760 static void cifs_readahead(struct readahead_control *ractl)
4761 {
4762         int rc;
4763         struct cifsFileInfo *open_file = ractl->file->private_data;
4764         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4765         struct TCP_Server_Info *server;
4766         pid_t pid;
4767         unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0;
4768         pgoff_t next_cached = ULONG_MAX;
4769         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4770                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4771         bool check_cache = caching;
4772
4773         xid = get_xid();
4774
4775         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4776                 pid = open_file->pid;
4777         else
4778                 pid = current->tgid;
4779
4780         rc = 0;
4781         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4782
4783         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4784                  __func__, ractl->file, ractl->mapping, readahead_count(ractl));
4785
4786         /*
4787          * Chop the readahead request up into rsize-sized read requests.
4788          */
4789         while ((nr_pages = readahead_count(ractl) - last_batch_size)) {
4790                 unsigned int i, got, rsize;
4791                 struct page *page;
4792                 struct cifs_readdata *rdata;
4793                 struct cifs_credits credits_on_stack;
4794                 struct cifs_credits *credits = &credits_on_stack;
4795                 pgoff_t index = readahead_index(ractl) + last_batch_size;
4796
4797                 /*
4798                  * Find out if we have anything cached in the range of
4799                  * interest, and if so, where the next chunk of cached data is.
4800                  */
4801                 if (caching) {
4802                         if (check_cache) {
4803                                 rc = cifs_fscache_query_occupancy(
4804                                         ractl->mapping->host, index, nr_pages,
4805                                         &next_cached, &cache_nr_pages);
4806                                 if (rc < 0)
4807                                         caching = false;
4808                                 check_cache = false;
4809                         }
4810
4811                         if (index == next_cached) {
4812                                 /*
4813                                  * TODO: Send a whole batch of pages to be read
4814                                  * by the cache.
4815                                  */
4816                                 struct folio *folio = readahead_folio(ractl);
4817
4818                                 last_batch_size = folio_nr_pages(folio);
4819                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4820                                                                &folio->page) < 0) {
4821                                         /*
4822                                          * TODO: Deal with cache read failure
4823                                          * here, but for the moment, delegate
4824                                          * that to readpage.
4825                                          */
4826                                         caching = false;
4827                                 }
4828                                 folio_unlock(folio);
4829                                 next_cached++;
4830                                 cache_nr_pages--;
4831                                 if (cache_nr_pages == 0)
4832                                         check_cache = true;
4833                                 continue;
4834                         }
4835                 }
4836
4837                 if (open_file->invalidHandle) {
4838                         rc = cifs_reopen_file(open_file, true);
4839                         if (rc) {
4840                                 if (rc == -EAGAIN)
4841                                         continue;
4842                                 break;
4843                         }
4844                 }
4845
4846                 if (cifs_sb->ctx->rsize == 0)
4847                         cifs_sb->ctx->rsize =
4848                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4849                                                              cifs_sb->ctx);
4850
4851                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4852                                                    &rsize, credits);
4853                 if (rc)
4854                         break;
4855                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
4856                 nr_pages = min_t(size_t, nr_pages, next_cached - index);
4857
4858                 /*
4859                  * Give up immediately if rsize is too small to read an entire
4860                  * page. The VFS will fall back to readpage. We should never
4861                  * reach this point however since we set ra_pages to 0 when the
4862                  * rsize is smaller than a cache page.
4863                  */
4864                 if (unlikely(!nr_pages)) {
4865                         add_credits_and_wake_if(server, credits, 0);
4866                         break;
4867                 }
4868
4869                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4870                 if (!rdata) {
4871                         /* best to give up if we're out of mem */
4872                         add_credits_and_wake_if(server, credits, 0);
4873                         break;
4874                 }
4875
4876                 got = __readahead_batch(ractl, rdata->pages, nr_pages);
4877                 if (got != nr_pages) {
4878                         pr_warn("__readahead_batch() returned %u/%u\n",
4879                                 got, nr_pages);
4880                         nr_pages = got;
4881                 }
4882
4883                 rdata->nr_pages = nr_pages;
4884                 rdata->bytes    = readahead_batch_length(ractl);
4885                 rdata->cfile    = cifsFileInfo_get(open_file);
4886                 rdata->server   = server;
4887                 rdata->mapping  = ractl->mapping;
4888                 rdata->offset   = readahead_pos(ractl);
4889                 rdata->pid      = pid;
4890                 rdata->pagesz   = PAGE_SIZE;
4891                 rdata->tailsz   = PAGE_SIZE;
4892                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4893                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4894                 rdata->credits  = credits_on_stack;
4895
4896                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4897                 if (!rc) {
4898                         if (rdata->cfile->invalidHandle)
4899                                 rc = -EAGAIN;
4900                         else
4901                                 rc = server->ops->async_readv(rdata);
4902                 }
4903
4904                 if (rc) {
4905                         add_credits_and_wake_if(server, &rdata->credits, 0);
4906                         for (i = 0; i < rdata->nr_pages; i++) {
4907                                 page = rdata->pages[i];
4908                                 unlock_page(page);
4909                                 put_page(page);
4910                         }
4911                         /* Fallback to the readpage in error/reconnect cases */
4912                         kref_put(&rdata->refcount, cifs_readdata_release);
4913                         break;
4914                 }
4915
4916                 kref_put(&rdata->refcount, cifs_readdata_release);
4917                 last_batch_size = nr_pages;
4918         }
4919
4920         free_xid(xid);
4921 }
4922
4923 /*
4924  * cifs_readpage_worker must be called with the page pinned
4925  */
4926 static int cifs_readpage_worker(struct file *file, struct page *page,
4927         loff_t *poffset)
4928 {
4929         char *read_data;
4930         int rc;
4931
4932         /* Is the page cached? */
4933         rc = cifs_readpage_from_fscache(file_inode(file), page);
4934         if (rc == 0)
4935                 goto read_complete;
4936
4937         read_data = kmap(page);
4938         /* for reads over a certain size could initiate async read ahead */
4939
4940         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4941
4942         if (rc < 0)
4943                 goto io_error;
4944         else
4945                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4946
4947         /* we do not want atime to be less than mtime, it broke some apps */
4948         file_inode(file)->i_atime = current_time(file_inode(file));
4949         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4950                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4951         else
4952                 file_inode(file)->i_atime = current_time(file_inode(file));
4953
4954         if (PAGE_SIZE > rc)
4955                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4956
4957         flush_dcache_page(page);
4958         SetPageUptodate(page);
4959
4960         /* send this page to the cache */
4961         cifs_readpage_to_fscache(file_inode(file), page);
4962
4963         rc = 0;
4964
4965 io_error:
4966         kunmap(page);
4967
4968 read_complete:
4969         unlock_page(page);
4970         return rc;
4971 }
4972
4973 static int cifs_read_folio(struct file *file, struct folio *folio)
4974 {
4975         struct page *page = &folio->page;
4976         loff_t offset = page_file_offset(page);
4977         int rc = -EACCES;
4978         unsigned int xid;
4979
4980         xid = get_xid();
4981
4982         if (file->private_data == NULL) {
4983                 rc = -EBADF;
4984                 free_xid(xid);
4985                 return rc;
4986         }
4987
4988         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4989                  page, (int)offset, (int)offset);
4990
4991         rc = cifs_readpage_worker(file, page, &offset);
4992
4993         free_xid(xid);
4994         return rc;
4995 }
4996
4997 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4998 {
4999         struct cifsFileInfo *open_file;
5000
5001         spin_lock(&cifs_inode->open_file_lock);
5002         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
5003                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
5004                         spin_unlock(&cifs_inode->open_file_lock);
5005                         return 1;
5006                 }
5007         }
5008         spin_unlock(&cifs_inode->open_file_lock);
5009         return 0;
5010 }
5011
5012 /* We do not want to update the file size from server for inodes
5013    open for write - to avoid races with writepage extending
5014    the file - in the future we could consider allowing
5015    refreshing the inode only on increases in the file size
5016    but this is tricky to do without racing with writebehind
5017    page caching in the current Linux kernel design */
5018 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
5019 {
5020         if (!cifsInode)
5021                 return true;
5022
5023         if (is_inode_writable(cifsInode)) {
5024                 /* This inode is open for write at least once */
5025                 struct cifs_sb_info *cifs_sb;
5026
5027                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
5028                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
5029                         /* since no page cache to corrupt on directio
5030                         we can change size safely */
5031                         return true;
5032                 }
5033
5034                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
5035                         return true;
5036
5037                 return false;
5038         } else
5039                 return true;
5040 }
5041
5042 static int cifs_write_begin(struct file *file, struct address_space *mapping,
5043                         loff_t pos, unsigned len,
5044                         struct page **pagep, void **fsdata)
5045 {
5046         int oncethru = 0;
5047         pgoff_t index = pos >> PAGE_SHIFT;
5048         loff_t offset = pos & (PAGE_SIZE - 1);
5049         loff_t page_start = pos & PAGE_MASK;
5050         loff_t i_size;
5051         struct page *page;
5052         int rc = 0;
5053
5054         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
5055
5056 start:
5057         page = grab_cache_page_write_begin(mapping, index);
5058         if (!page) {
5059                 rc = -ENOMEM;
5060                 goto out;
5061         }
5062
5063         if (PageUptodate(page))
5064                 goto out;
5065
5066         /*
5067          * If we write a full page it will be up to date, no need to read from
5068          * the server. If the write is short, we'll end up doing a sync write
5069          * instead.
5070          */
5071         if (len == PAGE_SIZE)
5072                 goto out;
5073
5074         /*
5075          * optimize away the read when we have an oplock, and we're not
5076          * expecting to use any of the data we'd be reading in. That
5077          * is, when the page lies beyond the EOF, or straddles the EOF
5078          * and the write will cover all of the existing data.
5079          */
5080         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
5081                 i_size = i_size_read(mapping->host);
5082                 if (page_start >= i_size ||
5083                     (offset == 0 && (pos + len) >= i_size)) {
5084                         zero_user_segments(page, 0, offset,
5085                                            offset + len,
5086                                            PAGE_SIZE);
5087                         /*
5088                          * PageChecked means that the parts of the page
5089                          * to which we're not writing are considered up
5090                          * to date. Once the data is copied to the
5091                          * page, it can be set uptodate.
5092                          */
5093                         SetPageChecked(page);
5094                         goto out;
5095                 }
5096         }
5097
5098         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
5099                 /*
5100                  * might as well read a page, it is fast enough. If we get
5101                  * an error, we don't need to return it. cifs_write_end will
5102                  * do a sync write instead since PG_uptodate isn't set.
5103                  */
5104                 cifs_readpage_worker(file, page, &page_start);
5105                 put_page(page);
5106                 oncethru = 1;
5107                 goto start;
5108         } else {
5109                 /* we could try using another file handle if there is one -
5110                    but how would we lock it to prevent close of that handle
5111                    racing with this read? In any case
5112                    this will be written out by write_end so is fine */
5113         }
5114 out:
5115         *pagep = page;
5116         return rc;
5117 }
5118
5119 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
5120 {
5121         if (folio_test_private(folio))
5122                 return 0;
5123         if (folio_test_fscache(folio)) {
5124                 if (current_is_kswapd() || !(gfp & __GFP_FS))
5125                         return false;
5126                 folio_wait_fscache(folio);
5127         }
5128         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
5129         return true;
5130 }
5131
5132 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
5133                                  size_t length)
5134 {
5135         folio_wait_fscache(folio);
5136 }
5137
5138 static int cifs_launder_folio(struct folio *folio)
5139 {
5140         int rc = 0;
5141         loff_t range_start = folio_pos(folio);
5142         loff_t range_end = range_start + folio_size(folio);
5143         struct writeback_control wbc = {
5144                 .sync_mode = WB_SYNC_ALL,
5145                 .nr_to_write = 0,
5146                 .range_start = range_start,
5147                 .range_end = range_end,
5148         };
5149
5150         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5151
5152         if (folio_clear_dirty_for_io(folio))
5153                 rc = cifs_writepage_locked(&folio->page, &wbc);
5154
5155         folio_wait_fscache(folio);
5156         return rc;
5157 }
5158
5159 void cifs_oplock_break(struct work_struct *work)
5160 {
5161         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5162                                                   oplock_break);
5163         struct inode *inode = d_inode(cfile->dentry);
5164         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
5165         struct cifsInodeInfo *cinode = CIFS_I(inode);
5166         struct cifs_tcon *tcon;
5167         struct TCP_Server_Info *server;
5168         struct tcon_link *tlink;
5169         int rc = 0;
5170         bool purge_cache = false, oplock_break_cancelled;
5171         __u64 persistent_fid, volatile_fid;
5172         __u16 net_fid;
5173
5174         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5175                         TASK_UNINTERRUPTIBLE);
5176
5177         tlink = cifs_sb_tlink(cifs_sb);
5178         if (IS_ERR(tlink))
5179                 goto out;
5180         tcon = tlink_tcon(tlink);
5181         server = tcon->ses->server;
5182
5183         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5184                                       cfile->oplock_epoch, &purge_cache);
5185
5186         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5187                                                 cifs_has_mand_locks(cinode)) {
5188                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5189                          inode);
5190                 cinode->oplock = 0;
5191         }
5192
5193         if (inode && S_ISREG(inode->i_mode)) {
5194                 if (CIFS_CACHE_READ(cinode))
5195                         break_lease(inode, O_RDONLY);
5196                 else
5197                         break_lease(inode, O_WRONLY);
5198                 rc = filemap_fdatawrite(inode->i_mapping);
5199                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5200                         rc = filemap_fdatawait(inode->i_mapping);
5201                         mapping_set_error(inode->i_mapping, rc);
5202                         cifs_zap_mapping(inode);
5203                 }
5204                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5205                 if (CIFS_CACHE_WRITE(cinode))
5206                         goto oplock_break_ack;
5207         }
5208
5209         rc = cifs_push_locks(cfile);
5210         if (rc)
5211                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5212
5213 oplock_break_ack:
5214         /*
5215          * When oplock break is received and there are no active
5216          * file handles but cached, then schedule deferred close immediately.
5217          * So, new open will not use cached handle.
5218          */
5219
5220         if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5221                 cifs_close_deferred_file(cinode);
5222
5223         persistent_fid = cfile->fid.persistent_fid;
5224         volatile_fid = cfile->fid.volatile_fid;
5225         net_fid = cfile->fid.netfid;
5226         oplock_break_cancelled = cfile->oplock_break_cancelled;
5227
5228         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5229         /*
5230          * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5231          * an acknowledgment to be sent when the file has already been closed.
5232          */
5233         spin_lock(&cinode->open_file_lock);
5234         /* check list empty since can race with kill_sb calling tree disconnect */
5235         if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5236                 spin_unlock(&cinode->open_file_lock);
5237                 rc = server->ops->oplock_response(tcon, persistent_fid,
5238                                                   volatile_fid, net_fid, cinode);
5239                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5240         } else
5241                 spin_unlock(&cinode->open_file_lock);
5242
5243         cifs_put_tlink(tlink);
5244 out:
5245         cifs_done_oplock_break(cinode);
5246 }
5247
5248 /*
5249  * The presence of cifs_direct_io() in the address space ops vector
5250  * allowes open() O_DIRECT flags which would have failed otherwise.
5251  *
5252  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5253  * so this method should never be called.
5254  *
5255  * Direct IO is not yet supported in the cached mode.
5256  */
5257 static ssize_t
5258 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5259 {
5260         /*
5261          * FIXME
5262          * Eventually need to support direct IO for non forcedirectio mounts
5263          */
5264         return -EINVAL;
5265 }
5266
5267 static int cifs_swap_activate(struct swap_info_struct *sis,
5268                               struct file *swap_file, sector_t *span)
5269 {
5270         struct cifsFileInfo *cfile = swap_file->private_data;
5271         struct inode *inode = swap_file->f_mapping->host;
5272         unsigned long blocks;
5273         long long isize;
5274
5275         cifs_dbg(FYI, "swap activate\n");
5276
5277         if (!swap_file->f_mapping->a_ops->swap_rw)
5278                 /* Cannot support swap */
5279                 return -EINVAL;
5280
5281         spin_lock(&inode->i_lock);
5282         blocks = inode->i_blocks;
5283         isize = inode->i_size;
5284         spin_unlock(&inode->i_lock);
5285         if (blocks*512 < isize) {
5286                 pr_warn("swap activate: swapfile has holes\n");
5287                 return -EINVAL;
5288         }
5289         *span = sis->pages;
5290
5291         pr_warn_once("Swap support over SMB3 is experimental\n");
5292
5293         /*
5294          * TODO: consider adding ACL (or documenting how) to prevent other
5295          * users (on this or other systems) from reading it
5296          */
5297
5298
5299         /* TODO: add sk_set_memalloc(inet) or similar */
5300
5301         if (cfile)
5302                 cfile->swapfile = true;
5303         /*
5304          * TODO: Since file already open, we can't open with DENY_ALL here
5305          * but we could add call to grab a byte range lock to prevent others
5306          * from reading or writing the file
5307          */
5308
5309         sis->flags |= SWP_FS_OPS;
5310         return add_swap_extent(sis, 0, sis->max, 0);
5311 }
5312
5313 static void cifs_swap_deactivate(struct file *file)
5314 {
5315         struct cifsFileInfo *cfile = file->private_data;
5316
5317         cifs_dbg(FYI, "swap deactivate\n");
5318
5319         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5320
5321         if (cfile)
5322                 cfile->swapfile = false;
5323
5324         /* do we need to unpin (or unlock) the file */
5325 }
5326
5327 /*
5328  * Mark a page as having been made dirty and thus needing writeback.  We also
5329  * need to pin the cache object to write back to.
5330  */
5331 #ifdef CONFIG_CIFS_FSCACHE
5332 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5333 {
5334         return fscache_dirty_folio(mapping, folio,
5335                                         cifs_inode_cookie(mapping->host));
5336 }
5337 #else
5338 #define cifs_dirty_folio filemap_dirty_folio
5339 #endif
5340
5341 const struct address_space_operations cifs_addr_ops = {
5342         .read_folio = cifs_read_folio,
5343         .readahead = cifs_readahead,
5344         .writepage = cifs_writepage,
5345         .writepages = cifs_writepages,
5346         .write_begin = cifs_write_begin,
5347         .write_end = cifs_write_end,
5348         .dirty_folio = cifs_dirty_folio,
5349         .release_folio = cifs_release_folio,
5350         .direct_IO = cifs_direct_io,
5351         .invalidate_folio = cifs_invalidate_folio,
5352         .launder_folio = cifs_launder_folio,
5353         /*
5354          * TODO: investigate and if useful we could add an cifs_migratePage
5355          * helper (under an CONFIG_MIGRATION) in the future, and also
5356          * investigate and add an is_dirty_writeback helper if needed
5357          */
5358         .swap_activate = cifs_swap_activate,
5359         .swap_deactivate = cifs_swap_deactivate,
5360 };
5361
5362 /*
5363  * cifs_readahead requires the server to support a buffer large enough to
5364  * contain the header plus one complete page of data.  Otherwise, we need
5365  * to leave cifs_readahead out of the address space operations.
5366  */
5367 const struct address_space_operations cifs_addr_ops_smallbuf = {
5368         .read_folio = cifs_read_folio,
5369         .writepage = cifs_writepage,
5370         .writepages = cifs_writepages,
5371         .write_begin = cifs_write_begin,
5372         .write_end = cifs_write_end,
5373         .dirty_folio = cifs_dirty_folio,
5374         .release_folio = cifs_release_folio,
5375         .invalidate_folio = cifs_invalidate_folio,
5376         .launder_folio = cifs_launder_folio,
5377 };