GNU Linux-libre 5.4.241-gnu1
[releases.git] / lib / iov_iter.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/export.h>
3 #include <linux/bvec.h>
4 #include <linux/uio.h>
5 #include <linux/pagemap.h>
6 #include <linux/slab.h>
7 #include <linux/vmalloc.h>
8 #include <linux/splice.h>
9 #include <net/checksum.h>
10 #include <linux/scatterlist.h>
11
12 #define PIPE_PARANOIA /* for now */
13
14 #define iterate_iovec(i, n, __v, __p, skip, STEP) {     \
15         size_t left;                                    \
16         size_t wanted = n;                              \
17         __p = i->iov;                                   \
18         __v.iov_len = min(n, __p->iov_len - skip);      \
19         if (likely(__v.iov_len)) {                      \
20                 __v.iov_base = __p->iov_base + skip;    \
21                 left = (STEP);                          \
22                 __v.iov_len -= left;                    \
23                 skip += __v.iov_len;                    \
24                 n -= __v.iov_len;                       \
25         } else {                                        \
26                 left = 0;                               \
27         }                                               \
28         while (unlikely(!left && n)) {                  \
29                 __p++;                                  \
30                 __v.iov_len = min(n, __p->iov_len);     \
31                 if (unlikely(!__v.iov_len))             \
32                         continue;                       \
33                 __v.iov_base = __p->iov_base;           \
34                 left = (STEP);                          \
35                 __v.iov_len -= left;                    \
36                 skip = __v.iov_len;                     \
37                 n -= __v.iov_len;                       \
38         }                                               \
39         n = wanted - n;                                 \
40 }
41
42 #define iterate_kvec(i, n, __v, __p, skip, STEP) {      \
43         size_t wanted = n;                              \
44         __p = i->kvec;                                  \
45         __v.iov_len = min(n, __p->iov_len - skip);      \
46         if (likely(__v.iov_len)) {                      \
47                 __v.iov_base = __p->iov_base + skip;    \
48                 (void)(STEP);                           \
49                 skip += __v.iov_len;                    \
50                 n -= __v.iov_len;                       \
51         }                                               \
52         while (unlikely(n)) {                           \
53                 __p++;                                  \
54                 __v.iov_len = min(n, __p->iov_len);     \
55                 if (unlikely(!__v.iov_len))             \
56                         continue;                       \
57                 __v.iov_base = __p->iov_base;           \
58                 (void)(STEP);                           \
59                 skip = __v.iov_len;                     \
60                 n -= __v.iov_len;                       \
61         }                                               \
62         n = wanted;                                     \
63 }
64
65 #define iterate_bvec(i, n, __v, __bi, skip, STEP) {     \
66         struct bvec_iter __start;                       \
67         __start.bi_size = n;                            \
68         __start.bi_bvec_done = skip;                    \
69         __start.bi_idx = 0;                             \
70         for_each_bvec(__v, i->bvec, __bi, __start) {    \
71                 if (!__v.bv_len)                        \
72                         continue;                       \
73                 (void)(STEP);                           \
74         }                                               \
75 }
76
77 #define iterate_all_kinds(i, n, v, I, B, K) {                   \
78         if (likely(n)) {                                        \
79                 size_t skip = i->iov_offset;                    \
80                 if (unlikely(i->type & ITER_BVEC)) {            \
81                         struct bio_vec v;                       \
82                         struct bvec_iter __bi;                  \
83                         iterate_bvec(i, n, v, __bi, skip, (B))  \
84                 } else if (unlikely(i->type & ITER_KVEC)) {     \
85                         const struct kvec *kvec;                \
86                         struct kvec v;                          \
87                         iterate_kvec(i, n, v, kvec, skip, (K))  \
88                 } else if (unlikely(i->type & ITER_DISCARD)) {  \
89                 } else {                                        \
90                         const struct iovec *iov;                \
91                         struct iovec v;                         \
92                         iterate_iovec(i, n, v, iov, skip, (I))  \
93                 }                                               \
94         }                                                       \
95 }
96
97 #define iterate_and_advance(i, n, v, I, B, K) {                 \
98         if (unlikely(i->count < n))                             \
99                 n = i->count;                                   \
100         if (i->count) {                                         \
101                 size_t skip = i->iov_offset;                    \
102                 if (unlikely(i->type & ITER_BVEC)) {            \
103                         const struct bio_vec *bvec = i->bvec;   \
104                         struct bio_vec v;                       \
105                         struct bvec_iter __bi;                  \
106                         iterate_bvec(i, n, v, __bi, skip, (B))  \
107                         i->bvec = __bvec_iter_bvec(i->bvec, __bi);      \
108                         i->nr_segs -= i->bvec - bvec;           \
109                         skip = __bi.bi_bvec_done;               \
110                 } else if (unlikely(i->type & ITER_KVEC)) {     \
111                         const struct kvec *kvec;                \
112                         struct kvec v;                          \
113                         iterate_kvec(i, n, v, kvec, skip, (K))  \
114                         if (skip == kvec->iov_len) {            \
115                                 kvec++;                         \
116                                 skip = 0;                       \
117                         }                                       \
118                         i->nr_segs -= kvec - i->kvec;           \
119                         i->kvec = kvec;                         \
120                 } else if (unlikely(i->type & ITER_DISCARD)) {  \
121                         skip += n;                              \
122                 } else {                                        \
123                         const struct iovec *iov;                \
124                         struct iovec v;                         \
125                         iterate_iovec(i, n, v, iov, skip, (I))  \
126                         if (skip == iov->iov_len) {             \
127                                 iov++;                          \
128                                 skip = 0;                       \
129                         }                                       \
130                         i->nr_segs -= iov - i->iov;             \
131                         i->iov = iov;                           \
132                 }                                               \
133                 i->count -= n;                                  \
134                 i->iov_offset = skip;                           \
135         }                                                       \
136 }
137
138 static int copyout(void __user *to, const void *from, size_t n)
139 {
140         if (access_ok(to, n)) {
141                 kasan_check_read(from, n);
142                 n = raw_copy_to_user(to, from, n);
143         }
144         return n;
145 }
146
147 static int copyin(void *to, const void __user *from, size_t n)
148 {
149         if (access_ok(from, n)) {
150                 kasan_check_write(to, n);
151                 n = raw_copy_from_user(to, from, n);
152         }
153         return n;
154 }
155
156 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
157                          struct iov_iter *i)
158 {
159         size_t skip, copy, left, wanted;
160         const struct iovec *iov;
161         char __user *buf;
162         void *kaddr, *from;
163
164         if (unlikely(bytes > i->count))
165                 bytes = i->count;
166
167         if (unlikely(!bytes))
168                 return 0;
169
170         might_fault();
171         wanted = bytes;
172         iov = i->iov;
173         skip = i->iov_offset;
174         buf = iov->iov_base + skip;
175         copy = min(bytes, iov->iov_len - skip);
176
177         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
178                 kaddr = kmap_atomic(page);
179                 from = kaddr + offset;
180
181                 /* first chunk, usually the only one */
182                 left = copyout(buf, from, copy);
183                 copy -= left;
184                 skip += copy;
185                 from += copy;
186                 bytes -= copy;
187
188                 while (unlikely(!left && bytes)) {
189                         iov++;
190                         buf = iov->iov_base;
191                         copy = min(bytes, iov->iov_len);
192                         left = copyout(buf, from, copy);
193                         copy -= left;
194                         skip = copy;
195                         from += copy;
196                         bytes -= copy;
197                 }
198                 if (likely(!bytes)) {
199                         kunmap_atomic(kaddr);
200                         goto done;
201                 }
202                 offset = from - kaddr;
203                 buf += copy;
204                 kunmap_atomic(kaddr);
205                 copy = min(bytes, iov->iov_len - skip);
206         }
207         /* Too bad - revert to non-atomic kmap */
208
209         kaddr = kmap(page);
210         from = kaddr + offset;
211         left = copyout(buf, from, copy);
212         copy -= left;
213         skip += copy;
214         from += copy;
215         bytes -= copy;
216         while (unlikely(!left && bytes)) {
217                 iov++;
218                 buf = iov->iov_base;
219                 copy = min(bytes, iov->iov_len);
220                 left = copyout(buf, from, copy);
221                 copy -= left;
222                 skip = copy;
223                 from += copy;
224                 bytes -= copy;
225         }
226         kunmap(page);
227
228 done:
229         if (skip == iov->iov_len) {
230                 iov++;
231                 skip = 0;
232         }
233         i->count -= wanted - bytes;
234         i->nr_segs -= iov - i->iov;
235         i->iov = iov;
236         i->iov_offset = skip;
237         return wanted - bytes;
238 }
239
240 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
241                          struct iov_iter *i)
242 {
243         size_t skip, copy, left, wanted;
244         const struct iovec *iov;
245         char __user *buf;
246         void *kaddr, *to;
247
248         if (unlikely(bytes > i->count))
249                 bytes = i->count;
250
251         if (unlikely(!bytes))
252                 return 0;
253
254         might_fault();
255         wanted = bytes;
256         iov = i->iov;
257         skip = i->iov_offset;
258         buf = iov->iov_base + skip;
259         copy = min(bytes, iov->iov_len - skip);
260
261         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
262                 kaddr = kmap_atomic(page);
263                 to = kaddr + offset;
264
265                 /* first chunk, usually the only one */
266                 left = copyin(to, buf, copy);
267                 copy -= left;
268                 skip += copy;
269                 to += copy;
270                 bytes -= copy;
271
272                 while (unlikely(!left && bytes)) {
273                         iov++;
274                         buf = iov->iov_base;
275                         copy = min(bytes, iov->iov_len);
276                         left = copyin(to, buf, copy);
277                         copy -= left;
278                         skip = copy;
279                         to += copy;
280                         bytes -= copy;
281                 }
282                 if (likely(!bytes)) {
283                         kunmap_atomic(kaddr);
284                         goto done;
285                 }
286                 offset = to - kaddr;
287                 buf += copy;
288                 kunmap_atomic(kaddr);
289                 copy = min(bytes, iov->iov_len - skip);
290         }
291         /* Too bad - revert to non-atomic kmap */
292
293         kaddr = kmap(page);
294         to = kaddr + offset;
295         left = copyin(to, buf, copy);
296         copy -= left;
297         skip += copy;
298         to += copy;
299         bytes -= copy;
300         while (unlikely(!left && bytes)) {
301                 iov++;
302                 buf = iov->iov_base;
303                 copy = min(bytes, iov->iov_len);
304                 left = copyin(to, buf, copy);
305                 copy -= left;
306                 skip = copy;
307                 to += copy;
308                 bytes -= copy;
309         }
310         kunmap(page);
311
312 done:
313         if (skip == iov->iov_len) {
314                 iov++;
315                 skip = 0;
316         }
317         i->count -= wanted - bytes;
318         i->nr_segs -= iov - i->iov;
319         i->iov = iov;
320         i->iov_offset = skip;
321         return wanted - bytes;
322 }
323
324 #ifdef PIPE_PARANOIA
325 static bool sanity(const struct iov_iter *i)
326 {
327         struct pipe_inode_info *pipe = i->pipe;
328         int idx = i->idx;
329         int next = pipe->curbuf + pipe->nrbufs;
330         if (i->iov_offset) {
331                 struct pipe_buffer *p;
332                 if (unlikely(!pipe->nrbufs))
333                         goto Bad;       // pipe must be non-empty
334                 if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
335                         goto Bad;       // must be at the last buffer...
336
337                 p = &pipe->bufs[idx];
338                 if (unlikely(p->offset + p->len != i->iov_offset))
339                         goto Bad;       // ... at the end of segment
340         } else {
341                 if (idx != (next & (pipe->buffers - 1)))
342                         goto Bad;       // must be right after the last buffer
343         }
344         return true;
345 Bad:
346         printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
347         printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
348                         pipe->curbuf, pipe->nrbufs, pipe->buffers);
349         for (idx = 0; idx < pipe->buffers; idx++)
350                 printk(KERN_ERR "[%p %p %d %d]\n",
351                         pipe->bufs[idx].ops,
352                         pipe->bufs[idx].page,
353                         pipe->bufs[idx].offset,
354                         pipe->bufs[idx].len);
355         WARN_ON(1);
356         return false;
357 }
358 #else
359 #define sanity(i) true
360 #endif
361
362 static inline int next_idx(int idx, struct pipe_inode_info *pipe)
363 {
364         return (idx + 1) & (pipe->buffers - 1);
365 }
366
367 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
368                          struct iov_iter *i)
369 {
370         struct pipe_inode_info *pipe = i->pipe;
371         struct pipe_buffer *buf;
372         size_t off;
373         int idx;
374
375         if (unlikely(bytes > i->count))
376                 bytes = i->count;
377
378         if (unlikely(!bytes))
379                 return 0;
380
381         if (!sanity(i))
382                 return 0;
383
384         off = i->iov_offset;
385         idx = i->idx;
386         buf = &pipe->bufs[idx];
387         if (off) {
388                 if (offset == off && buf->page == page) {
389                         /* merge with the last one */
390                         buf->len += bytes;
391                         i->iov_offset += bytes;
392                         goto out;
393                 }
394                 idx = next_idx(idx, pipe);
395                 buf = &pipe->bufs[idx];
396         }
397         if (idx == pipe->curbuf && pipe->nrbufs)
398                 return 0;
399         pipe->nrbufs++;
400         buf->ops = &page_cache_pipe_buf_ops;
401         buf->flags = 0;
402         get_page(buf->page = page);
403         buf->offset = offset;
404         buf->len = bytes;
405         i->iov_offset = offset + bytes;
406         i->idx = idx;
407 out:
408         i->count -= bytes;
409         return bytes;
410 }
411
412 /*
413  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
414  * bytes.  For each iovec, fault in each page that constitutes the iovec.
415  *
416  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
417  * because it is an invalid address).
418  */
419 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
420 {
421         size_t skip = i->iov_offset;
422         const struct iovec *iov;
423         int err;
424         struct iovec v;
425
426         if (iter_is_iovec(i)) {
427                 iterate_iovec(i, bytes, v, iov, skip, ({
428                         err = fault_in_pages_readable(v.iov_base, v.iov_len);
429                         if (unlikely(err))
430                         return err;
431                 0;}))
432         }
433         return 0;
434 }
435 EXPORT_SYMBOL(iov_iter_fault_in_readable);
436
437 void iov_iter_init(struct iov_iter *i, unsigned int direction,
438                         const struct iovec *iov, unsigned long nr_segs,
439                         size_t count)
440 {
441         WARN_ON(direction & ~(READ | WRITE));
442         direction &= READ | WRITE;
443
444         /* It will get better.  Eventually... */
445         if (uaccess_kernel()) {
446                 i->type = ITER_KVEC | direction;
447                 i->kvec = (struct kvec *)iov;
448         } else {
449                 i->type = ITER_IOVEC | direction;
450                 i->iov = iov;
451         }
452         i->nr_segs = nr_segs;
453         i->iov_offset = 0;
454         i->count = count;
455 }
456 EXPORT_SYMBOL(iov_iter_init);
457
458 static void memzero_page(struct page *page, size_t offset, size_t len)
459 {
460         char *addr = kmap_atomic(page);
461         memset(addr + offset, 0, len);
462         kunmap_atomic(addr);
463 }
464
465 static inline bool allocated(struct pipe_buffer *buf)
466 {
467         return buf->ops == &default_pipe_buf_ops;
468 }
469
470 static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
471 {
472         size_t off = i->iov_offset;
473         int idx = i->idx;
474         if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
475                 idx = next_idx(idx, i->pipe);
476                 off = 0;
477         }
478         *idxp = idx;
479         *offp = off;
480 }
481
482 static size_t push_pipe(struct iov_iter *i, size_t size,
483                         int *idxp, size_t *offp)
484 {
485         struct pipe_inode_info *pipe = i->pipe;
486         size_t off;
487         int idx;
488         ssize_t left;
489
490         if (unlikely(size > i->count))
491                 size = i->count;
492         if (unlikely(!size))
493                 return 0;
494
495         left = size;
496         data_start(i, &idx, &off);
497         *idxp = idx;
498         *offp = off;
499         if (off) {
500                 left -= PAGE_SIZE - off;
501                 if (left <= 0) {
502                         pipe->bufs[idx].len += size;
503                         return size;
504                 }
505                 pipe->bufs[idx].len = PAGE_SIZE;
506                 idx = next_idx(idx, pipe);
507         }
508         while (idx != pipe->curbuf || !pipe->nrbufs) {
509                 struct page *page = alloc_page(GFP_USER);
510                 if (!page)
511                         break;
512                 pipe->nrbufs++;
513                 pipe->bufs[idx].ops = &default_pipe_buf_ops;
514                 pipe->bufs[idx].flags = 0;
515                 pipe->bufs[idx].page = page;
516                 pipe->bufs[idx].offset = 0;
517                 if (left <= PAGE_SIZE) {
518                         pipe->bufs[idx].len = left;
519                         return size;
520                 }
521                 pipe->bufs[idx].len = PAGE_SIZE;
522                 left -= PAGE_SIZE;
523                 idx = next_idx(idx, pipe);
524         }
525         return size - left;
526 }
527
528 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
529                                 struct iov_iter *i)
530 {
531         struct pipe_inode_info *pipe = i->pipe;
532         size_t n, off;
533         int idx;
534
535         if (!sanity(i))
536                 return 0;
537
538         bytes = n = push_pipe(i, bytes, &idx, &off);
539         if (unlikely(!n))
540                 return 0;
541         for ( ; n; idx = next_idx(idx, pipe), off = 0) {
542                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
543                 memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
544                 i->idx = idx;
545                 i->iov_offset = off + chunk;
546                 n -= chunk;
547                 addr += chunk;
548         }
549         i->count -= bytes;
550         return bytes;
551 }
552
553 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
554                               __wsum sum, size_t off)
555 {
556         __wsum next = csum_partial_copy_nocheck(from, to, len, 0);
557         return csum_block_add(sum, next, off);
558 }
559
560 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
561                                          struct csum_state *csstate,
562                                          struct iov_iter *i)
563 {
564         struct pipe_inode_info *pipe = i->pipe;
565         __wsum sum = csstate->csum;
566         size_t off = csstate->off;
567         size_t n, r;
568         int idx;
569
570         if (!sanity(i))
571                 return 0;
572
573         bytes = n = push_pipe(i, bytes, &idx, &r);
574         if (unlikely(!n))
575                 return 0;
576         for ( ; n; idx = next_idx(idx, pipe), r = 0) {
577                 size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
578                 char *p = kmap_atomic(pipe->bufs[idx].page);
579                 sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
580                 kunmap_atomic(p);
581                 i->idx = idx;
582                 i->iov_offset = r + chunk;
583                 n -= chunk;
584                 off += chunk;
585                 addr += chunk;
586         }
587         i->count -= bytes;
588         csstate->csum = sum;
589         csstate->off = off;
590         return bytes;
591 }
592
593 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
594 {
595         const char *from = addr;
596         if (unlikely(iov_iter_is_pipe(i)))
597                 return copy_pipe_to_iter(addr, bytes, i);
598         if (iter_is_iovec(i))
599                 might_fault();
600         iterate_and_advance(i, bytes, v,
601                 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
602                 memcpy_to_page(v.bv_page, v.bv_offset,
603                                (from += v.bv_len) - v.bv_len, v.bv_len),
604                 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
605         )
606
607         return bytes;
608 }
609 EXPORT_SYMBOL(_copy_to_iter);
610
611 #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
612 static int copyout_mcsafe(void __user *to, const void *from, size_t n)
613 {
614         if (access_ok(to, n)) {
615                 kasan_check_read(from, n);
616                 n = copy_to_user_mcsafe((__force void *) to, from, n);
617         }
618         return n;
619 }
620
621 static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
622                 const char *from, size_t len)
623 {
624         unsigned long ret;
625         char *to;
626
627         to = kmap_atomic(page);
628         ret = memcpy_mcsafe(to + offset, from, len);
629         kunmap_atomic(to);
630
631         return ret;
632 }
633
634 static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
635                                 struct iov_iter *i)
636 {
637         struct pipe_inode_info *pipe = i->pipe;
638         size_t n, off, xfer = 0;
639         int idx;
640
641         if (!sanity(i))
642                 return 0;
643
644         bytes = n = push_pipe(i, bytes, &idx, &off);
645         if (unlikely(!n))
646                 return 0;
647         for ( ; n; idx = next_idx(idx, pipe), off = 0) {
648                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
649                 unsigned long rem;
650
651                 rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr,
652                                 chunk);
653                 i->idx = idx;
654                 i->iov_offset = off + chunk - rem;
655                 xfer += chunk - rem;
656                 if (rem)
657                         break;
658                 n -= chunk;
659                 addr += chunk;
660         }
661         i->count -= xfer;
662         return xfer;
663 }
664
665 /**
666  * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
667  * @addr: source kernel address
668  * @bytes: total transfer length
669  * @iter: destination iterator
670  *
671  * The pmem driver arranges for filesystem-dax to use this facility via
672  * dax_copy_to_iter() for protecting read/write to persistent memory.
673  * Unless / until an architecture can guarantee identical performance
674  * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
675  * performance regression to switch more users to the mcsafe version.
676  *
677  * Otherwise, the main differences between this and typical _copy_to_iter().
678  *
679  * * Typical tail/residue handling after a fault retries the copy
680  *   byte-by-byte until the fault happens again. Re-triggering machine
681  *   checks is potentially fatal so the implementation uses source
682  *   alignment and poison alignment assumptions to avoid re-triggering
683  *   hardware exceptions.
684  *
685  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
686  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
687  *   a short copy.
688  *
689  * See MCSAFE_TEST for self-test.
690  */
691 size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
692 {
693         const char *from = addr;
694         unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
695
696         if (unlikely(iov_iter_is_pipe(i)))
697                 return copy_pipe_to_iter_mcsafe(addr, bytes, i);
698         if (iter_is_iovec(i))
699                 might_fault();
700         iterate_and_advance(i, bytes, v,
701                 copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
702                 ({
703                 rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
704                                (from += v.bv_len) - v.bv_len, v.bv_len);
705                 if (rem) {
706                         curr_addr = (unsigned long) from;
707                         bytes = curr_addr - s_addr - rem;
708                         return bytes;
709                 }
710                 }),
711                 ({
712                 rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
713                                 v.iov_len);
714                 if (rem) {
715                         curr_addr = (unsigned long) from;
716                         bytes = curr_addr - s_addr - rem;
717                         return bytes;
718                 }
719                 })
720         )
721
722         return bytes;
723 }
724 EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
725 #endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
726
727 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
728 {
729         char *to = addr;
730         if (unlikely(iov_iter_is_pipe(i))) {
731                 WARN_ON(1);
732                 return 0;
733         }
734         if (iter_is_iovec(i))
735                 might_fault();
736         iterate_and_advance(i, bytes, v,
737                 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
738                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
739                                  v.bv_offset, v.bv_len),
740                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
741         )
742
743         return bytes;
744 }
745 EXPORT_SYMBOL(_copy_from_iter);
746
747 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
748 {
749         char *to = addr;
750         if (unlikely(iov_iter_is_pipe(i))) {
751                 WARN_ON(1);
752                 return false;
753         }
754         if (unlikely(i->count < bytes))
755                 return false;
756
757         if (iter_is_iovec(i))
758                 might_fault();
759         iterate_all_kinds(i, bytes, v, ({
760                 if (copyin((to += v.iov_len) - v.iov_len,
761                                       v.iov_base, v.iov_len))
762                         return false;
763                 0;}),
764                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
765                                  v.bv_offset, v.bv_len),
766                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
767         )
768
769         iov_iter_advance(i, bytes);
770         return true;
771 }
772 EXPORT_SYMBOL(_copy_from_iter_full);
773
774 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
775 {
776         char *to = addr;
777         if (unlikely(iov_iter_is_pipe(i))) {
778                 WARN_ON(1);
779                 return 0;
780         }
781         iterate_and_advance(i, bytes, v,
782                 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
783                                          v.iov_base, v.iov_len),
784                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
785                                  v.bv_offset, v.bv_len),
786                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
787         )
788
789         return bytes;
790 }
791 EXPORT_SYMBOL(_copy_from_iter_nocache);
792
793 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
794 /**
795  * _copy_from_iter_flushcache - write destination through cpu cache
796  * @addr: destination kernel address
797  * @bytes: total transfer length
798  * @iter: source iterator
799  *
800  * The pmem driver arranges for filesystem-dax to use this facility via
801  * dax_copy_from_iter() for ensuring that writes to persistent memory
802  * are flushed through the CPU cache. It is differentiated from
803  * _copy_from_iter_nocache() in that guarantees all data is flushed for
804  * all iterator types. The _copy_from_iter_nocache() only attempts to
805  * bypass the cache for the ITER_IOVEC case, and on some archs may use
806  * instructions that strand dirty-data in the cache.
807  */
808 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
809 {
810         char *to = addr;
811         if (unlikely(iov_iter_is_pipe(i))) {
812                 WARN_ON(1);
813                 return 0;
814         }
815         iterate_and_advance(i, bytes, v,
816                 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
817                                          v.iov_base, v.iov_len),
818                 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
819                                  v.bv_offset, v.bv_len),
820                 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
821                         v.iov_len)
822         )
823
824         return bytes;
825 }
826 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
827 #endif
828
829 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
830 {
831         char *to = addr;
832         if (unlikely(iov_iter_is_pipe(i))) {
833                 WARN_ON(1);
834                 return false;
835         }
836         if (unlikely(i->count < bytes))
837                 return false;
838         iterate_all_kinds(i, bytes, v, ({
839                 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
840                                              v.iov_base, v.iov_len))
841                         return false;
842                 0;}),
843                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
844                                  v.bv_offset, v.bv_len),
845                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
846         )
847
848         iov_iter_advance(i, bytes);
849         return true;
850 }
851 EXPORT_SYMBOL(_copy_from_iter_full_nocache);
852
853 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
854 {
855         struct page *head;
856         size_t v = n + offset;
857
858         /*
859          * The general case needs to access the page order in order
860          * to compute the page size.
861          * However, we mostly deal with order-0 pages and thus can
862          * avoid a possible cache line miss for requests that fit all
863          * page orders.
864          */
865         if (n <= v && v <= PAGE_SIZE)
866                 return true;
867
868         head = compound_head(page);
869         v += (page - head) << PAGE_SHIFT;
870
871         if (likely(n <= v && v <= (page_size(head))))
872                 return true;
873         WARN_ON(1);
874         return false;
875 }
876
877 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
878                          struct iov_iter *i)
879 {
880         if (unlikely(!page_copy_sane(page, offset, bytes)))
881                 return 0;
882         if (i->type & (ITER_BVEC|ITER_KVEC)) {
883                 void *kaddr = kmap_atomic(page);
884                 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
885                 kunmap_atomic(kaddr);
886                 return wanted;
887         } else if (unlikely(iov_iter_is_discard(i))) {
888                 if (unlikely(i->count < bytes))
889                         bytes = i->count;
890                 i->count -= bytes;
891                 return bytes;
892         } else if (likely(!iov_iter_is_pipe(i)))
893                 return copy_page_to_iter_iovec(page, offset, bytes, i);
894         else
895                 return copy_page_to_iter_pipe(page, offset, bytes, i);
896 }
897 EXPORT_SYMBOL(copy_page_to_iter);
898
899 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
900                          struct iov_iter *i)
901 {
902         if (unlikely(!page_copy_sane(page, offset, bytes)))
903                 return 0;
904         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
905                 WARN_ON(1);
906                 return 0;
907         }
908         if (i->type & (ITER_BVEC|ITER_KVEC)) {
909                 void *kaddr = kmap_atomic(page);
910                 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
911                 kunmap_atomic(kaddr);
912                 return wanted;
913         } else
914                 return copy_page_from_iter_iovec(page, offset, bytes, i);
915 }
916 EXPORT_SYMBOL(copy_page_from_iter);
917
918 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
919 {
920         struct pipe_inode_info *pipe = i->pipe;
921         size_t n, off;
922         int idx;
923
924         if (!sanity(i))
925                 return 0;
926
927         bytes = n = push_pipe(i, bytes, &idx, &off);
928         if (unlikely(!n))
929                 return 0;
930
931         for ( ; n; idx = next_idx(idx, pipe), off = 0) {
932                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
933                 memzero_page(pipe->bufs[idx].page, off, chunk);
934                 i->idx = idx;
935                 i->iov_offset = off + chunk;
936                 n -= chunk;
937         }
938         i->count -= bytes;
939         return bytes;
940 }
941
942 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
943 {
944         if (unlikely(iov_iter_is_pipe(i)))
945                 return pipe_zero(bytes, i);
946         iterate_and_advance(i, bytes, v,
947                 clear_user(v.iov_base, v.iov_len),
948                 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
949                 memset(v.iov_base, 0, v.iov_len)
950         )
951
952         return bytes;
953 }
954 EXPORT_SYMBOL(iov_iter_zero);
955
956 size_t iov_iter_copy_from_user_atomic(struct page *page,
957                 struct iov_iter *i, unsigned long offset, size_t bytes)
958 {
959         char *kaddr = kmap_atomic(page), *p = kaddr + offset;
960         if (unlikely(!page_copy_sane(page, offset, bytes))) {
961                 kunmap_atomic(kaddr);
962                 return 0;
963         }
964         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
965                 kunmap_atomic(kaddr);
966                 WARN_ON(1);
967                 return 0;
968         }
969         iterate_all_kinds(i, bytes, v,
970                 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
971                 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
972                                  v.bv_offset, v.bv_len),
973                 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
974         )
975         kunmap_atomic(kaddr);
976         return bytes;
977 }
978 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
979
980 static inline void pipe_truncate(struct iov_iter *i)
981 {
982         struct pipe_inode_info *pipe = i->pipe;
983         if (pipe->nrbufs) {
984                 size_t off = i->iov_offset;
985                 int idx = i->idx;
986                 int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
987                 if (off) {
988                         pipe->bufs[idx].len = off - pipe->bufs[idx].offset;
989                         idx = next_idx(idx, pipe);
990                         nrbufs++;
991                 }
992                 while (pipe->nrbufs > nrbufs) {
993                         pipe_buf_release(pipe, &pipe->bufs[idx]);
994                         idx = next_idx(idx, pipe);
995                         pipe->nrbufs--;
996                 }
997         }
998 }
999
1000 static void pipe_advance(struct iov_iter *i, size_t size)
1001 {
1002         struct pipe_inode_info *pipe = i->pipe;
1003         if (unlikely(i->count < size))
1004                 size = i->count;
1005         if (size) {
1006                 struct pipe_buffer *buf;
1007                 size_t off = i->iov_offset, left = size;
1008                 int idx = i->idx;
1009                 if (off) /* make it relative to the beginning of buffer */
1010                         left += off - pipe->bufs[idx].offset;
1011                 while (1) {
1012                         buf = &pipe->bufs[idx];
1013                         if (left <= buf->len)
1014                                 break;
1015                         left -= buf->len;
1016                         idx = next_idx(idx, pipe);
1017                 }
1018                 i->idx = idx;
1019                 i->iov_offset = buf->offset + left;
1020         }
1021         i->count -= size;
1022         /* ... and discard everything past that point */
1023         pipe_truncate(i);
1024 }
1025
1026 void iov_iter_advance(struct iov_iter *i, size_t size)
1027 {
1028         if (unlikely(iov_iter_is_pipe(i))) {
1029                 pipe_advance(i, size);
1030                 return;
1031         }
1032         if (unlikely(iov_iter_is_discard(i))) {
1033                 i->count -= size;
1034                 return;
1035         }
1036         iterate_and_advance(i, size, v, 0, 0, 0)
1037 }
1038 EXPORT_SYMBOL(iov_iter_advance);
1039
1040 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1041 {
1042         if (!unroll)
1043                 return;
1044         if (WARN_ON(unroll > MAX_RW_COUNT))
1045                 return;
1046         i->count += unroll;
1047         if (unlikely(iov_iter_is_pipe(i))) {
1048                 struct pipe_inode_info *pipe = i->pipe;
1049                 int idx = i->idx;
1050                 size_t off = i->iov_offset;
1051                 while (1) {
1052                         size_t n = off - pipe->bufs[idx].offset;
1053                         if (unroll < n) {
1054                                 off -= unroll;
1055                                 break;
1056                         }
1057                         unroll -= n;
1058                         if (!unroll && idx == i->start_idx) {
1059                                 off = 0;
1060                                 break;
1061                         }
1062                         if (!idx--)
1063                                 idx = pipe->buffers - 1;
1064                         off = pipe->bufs[idx].offset + pipe->bufs[idx].len;
1065                 }
1066                 i->iov_offset = off;
1067                 i->idx = idx;
1068                 pipe_truncate(i);
1069                 return;
1070         }
1071         if (unlikely(iov_iter_is_discard(i)))
1072                 return;
1073         if (unroll <= i->iov_offset) {
1074                 i->iov_offset -= unroll;
1075                 return;
1076         }
1077         unroll -= i->iov_offset;
1078         if (iov_iter_is_bvec(i)) {
1079                 const struct bio_vec *bvec = i->bvec;
1080                 while (1) {
1081                         size_t n = (--bvec)->bv_len;
1082                         i->nr_segs++;
1083                         if (unroll <= n) {
1084                                 i->bvec = bvec;
1085                                 i->iov_offset = n - unroll;
1086                                 return;
1087                         }
1088                         unroll -= n;
1089                 }
1090         } else { /* same logics for iovec and kvec */
1091                 const struct iovec *iov = i->iov;
1092                 while (1) {
1093                         size_t n = (--iov)->iov_len;
1094                         i->nr_segs++;
1095                         if (unroll <= n) {
1096                                 i->iov = iov;
1097                                 i->iov_offset = n - unroll;
1098                                 return;
1099                         }
1100                         unroll -= n;
1101                 }
1102         }
1103 }
1104 EXPORT_SYMBOL(iov_iter_revert);
1105
1106 /*
1107  * Return the count of just the current iov_iter segment.
1108  */
1109 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1110 {
1111         if (unlikely(iov_iter_is_pipe(i)))
1112                 return i->count;        // it is a silly place, anyway
1113         if (i->nr_segs == 1)
1114                 return i->count;
1115         if (unlikely(iov_iter_is_discard(i)))
1116                 return i->count;
1117         else if (iov_iter_is_bvec(i))
1118                 return min(i->count, i->bvec->bv_len - i->iov_offset);
1119         else
1120                 return min(i->count, i->iov->iov_len - i->iov_offset);
1121 }
1122 EXPORT_SYMBOL(iov_iter_single_seg_count);
1123
1124 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1125                         const struct kvec *kvec, unsigned long nr_segs,
1126                         size_t count)
1127 {
1128         WARN_ON(direction & ~(READ | WRITE));
1129         i->type = ITER_KVEC | (direction & (READ | WRITE));
1130         i->kvec = kvec;
1131         i->nr_segs = nr_segs;
1132         i->iov_offset = 0;
1133         i->count = count;
1134 }
1135 EXPORT_SYMBOL(iov_iter_kvec);
1136
1137 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1138                         const struct bio_vec *bvec, unsigned long nr_segs,
1139                         size_t count)
1140 {
1141         WARN_ON(direction & ~(READ | WRITE));
1142         i->type = ITER_BVEC | (direction & (READ | WRITE));
1143         i->bvec = bvec;
1144         i->nr_segs = nr_segs;
1145         i->iov_offset = 0;
1146         i->count = count;
1147 }
1148 EXPORT_SYMBOL(iov_iter_bvec);
1149
1150 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1151                         struct pipe_inode_info *pipe,
1152                         size_t count)
1153 {
1154         BUG_ON(direction != READ);
1155         WARN_ON(pipe->nrbufs == pipe->buffers);
1156         i->type = ITER_PIPE | READ;
1157         i->pipe = pipe;
1158         i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1159         i->iov_offset = 0;
1160         i->count = count;
1161         i->start_idx = i->idx;
1162 }
1163 EXPORT_SYMBOL(iov_iter_pipe);
1164
1165 /**
1166  * iov_iter_discard - Initialise an I/O iterator that discards data
1167  * @i: The iterator to initialise.
1168  * @direction: The direction of the transfer.
1169  * @count: The size of the I/O buffer in bytes.
1170  *
1171  * Set up an I/O iterator that just discards everything that's written to it.
1172  * It's only available as a READ iterator.
1173  */
1174 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1175 {
1176         BUG_ON(direction != READ);
1177         i->type = ITER_DISCARD | READ;
1178         i->count = count;
1179         i->iov_offset = 0;
1180 }
1181 EXPORT_SYMBOL(iov_iter_discard);
1182
1183 unsigned long iov_iter_alignment(const struct iov_iter *i)
1184 {
1185         unsigned long res = 0;
1186         size_t size = i->count;
1187
1188         if (unlikely(iov_iter_is_pipe(i))) {
1189                 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
1190                         return size | i->iov_offset;
1191                 return size;
1192         }
1193         iterate_all_kinds(i, size, v,
1194                 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
1195                 res |= v.bv_offset | v.bv_len,
1196                 res |= (unsigned long)v.iov_base | v.iov_len
1197         )
1198         return res;
1199 }
1200 EXPORT_SYMBOL(iov_iter_alignment);
1201
1202 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1203 {
1204         unsigned long res = 0;
1205         size_t size = i->count;
1206
1207         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1208                 WARN_ON(1);
1209                 return ~0U;
1210         }
1211
1212         iterate_all_kinds(i, size, v,
1213                 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1214                         (size != v.iov_len ? size : 0), 0),
1215                 (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1216                         (size != v.bv_len ? size : 0)),
1217                 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1218                         (size != v.iov_len ? size : 0))
1219                 );
1220         return res;
1221 }
1222 EXPORT_SYMBOL(iov_iter_gap_alignment);
1223
1224 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1225                                 size_t maxsize,
1226                                 struct page **pages,
1227                                 int idx,
1228                                 size_t *start)
1229 {
1230         struct pipe_inode_info *pipe = i->pipe;
1231         ssize_t n = push_pipe(i, maxsize, &idx, start);
1232         if (!n)
1233                 return -EFAULT;
1234
1235         maxsize = n;
1236         n += *start;
1237         while (n > 0) {
1238                 get_page(*pages++ = pipe->bufs[idx].page);
1239                 idx = next_idx(idx, pipe);
1240                 n -= PAGE_SIZE;
1241         }
1242
1243         return maxsize;
1244 }
1245
1246 static ssize_t pipe_get_pages(struct iov_iter *i,
1247                    struct page **pages, size_t maxsize, unsigned maxpages,
1248                    size_t *start)
1249 {
1250         unsigned npages;
1251         size_t capacity;
1252         int idx;
1253
1254         if (!maxsize)
1255                 return 0;
1256
1257         if (!sanity(i))
1258                 return -EFAULT;
1259
1260         data_start(i, &idx, start);
1261         /* some of this one + all after this one */
1262         npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1263         capacity = min(npages,maxpages) * PAGE_SIZE - *start;
1264
1265         return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
1266 }
1267
1268 ssize_t iov_iter_get_pages(struct iov_iter *i,
1269                    struct page **pages, size_t maxsize, unsigned maxpages,
1270                    size_t *start)
1271 {
1272         if (maxsize > i->count)
1273                 maxsize = i->count;
1274
1275         if (unlikely(iov_iter_is_pipe(i)))
1276                 return pipe_get_pages(i, pages, maxsize, maxpages, start);
1277         if (unlikely(iov_iter_is_discard(i)))
1278                 return -EFAULT;
1279
1280         iterate_all_kinds(i, maxsize, v, ({
1281                 unsigned long addr = (unsigned long)v.iov_base;
1282                 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1283                 int n;
1284                 int res;
1285
1286                 if (len > maxpages * PAGE_SIZE)
1287                         len = maxpages * PAGE_SIZE;
1288                 addr &= ~(PAGE_SIZE - 1);
1289                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1290                 res = get_user_pages_fast(addr, n,
1291                                 iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1292                                 pages);
1293                 if (unlikely(res <= 0))
1294                         return res;
1295                 return (res == n ? len : res * PAGE_SIZE) - *start;
1296         0;}),({
1297                 /* can't be more than PAGE_SIZE */
1298                 *start = v.bv_offset;
1299                 get_page(*pages = v.bv_page);
1300                 return v.bv_len;
1301         }),({
1302                 return -EFAULT;
1303         })
1304         )
1305         return 0;
1306 }
1307 EXPORT_SYMBOL(iov_iter_get_pages);
1308
1309 static struct page **get_pages_array(size_t n)
1310 {
1311         return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1312 }
1313
1314 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1315                    struct page ***pages, size_t maxsize,
1316                    size_t *start)
1317 {
1318         struct page **p;
1319         ssize_t n;
1320         int idx;
1321         int npages;
1322
1323         if (!maxsize)
1324                 return 0;
1325
1326         if (!sanity(i))
1327                 return -EFAULT;
1328
1329         data_start(i, &idx, start);
1330         /* some of this one + all after this one */
1331         npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1332         n = npages * PAGE_SIZE - *start;
1333         if (maxsize > n)
1334                 maxsize = n;
1335         else
1336                 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1337         p = get_pages_array(npages);
1338         if (!p)
1339                 return -ENOMEM;
1340         n = __pipe_get_pages(i, maxsize, p, idx, start);
1341         if (n > 0)
1342                 *pages = p;
1343         else
1344                 kvfree(p);
1345         return n;
1346 }
1347
1348 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1349                    struct page ***pages, size_t maxsize,
1350                    size_t *start)
1351 {
1352         struct page **p;
1353
1354         if (maxsize > i->count)
1355                 maxsize = i->count;
1356
1357         if (unlikely(iov_iter_is_pipe(i)))
1358                 return pipe_get_pages_alloc(i, pages, maxsize, start);
1359         if (unlikely(iov_iter_is_discard(i)))
1360                 return -EFAULT;
1361
1362         iterate_all_kinds(i, maxsize, v, ({
1363                 unsigned long addr = (unsigned long)v.iov_base;
1364                 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1365                 int n;
1366                 int res;
1367
1368                 addr &= ~(PAGE_SIZE - 1);
1369                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1370                 p = get_pages_array(n);
1371                 if (!p)
1372                         return -ENOMEM;
1373                 res = get_user_pages_fast(addr, n,
1374                                 iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1375                 if (unlikely(res <= 0)) {
1376                         kvfree(p);
1377                         *pages = NULL;
1378                         return res;
1379                 }
1380                 *pages = p;
1381                 return (res == n ? len : res * PAGE_SIZE) - *start;
1382         0;}),({
1383                 /* can't be more than PAGE_SIZE */
1384                 *start = v.bv_offset;
1385                 *pages = p = get_pages_array(1);
1386                 if (!p)
1387                         return -ENOMEM;
1388                 get_page(*p = v.bv_page);
1389                 return v.bv_len;
1390         }),({
1391                 return -EFAULT;
1392         })
1393         )
1394         return 0;
1395 }
1396 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1397
1398 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1399                                struct iov_iter *i)
1400 {
1401         char *to = addr;
1402         __wsum sum, next;
1403         size_t off = 0;
1404         sum = *csum;
1405         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1406                 WARN_ON(1);
1407                 return 0;
1408         }
1409         iterate_and_advance(i, bytes, v, ({
1410                 int err = 0;
1411                 next = csum_and_copy_from_user(v.iov_base,
1412                                                (to += v.iov_len) - v.iov_len,
1413                                                v.iov_len, 0, &err);
1414                 if (!err) {
1415                         sum = csum_block_add(sum, next, off);
1416                         off += v.iov_len;
1417                 }
1418                 err ? v.iov_len : 0;
1419         }), ({
1420                 char *p = kmap_atomic(v.bv_page);
1421                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1422                                       p + v.bv_offset, v.bv_len,
1423                                       sum, off);
1424                 kunmap_atomic(p);
1425                 off += v.bv_len;
1426         }),({
1427                 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1428                                       v.iov_base, v.iov_len,
1429                                       sum, off);
1430                 off += v.iov_len;
1431         })
1432         )
1433         *csum = sum;
1434         return bytes;
1435 }
1436 EXPORT_SYMBOL(csum_and_copy_from_iter);
1437
1438 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1439                                struct iov_iter *i)
1440 {
1441         char *to = addr;
1442         __wsum sum, next;
1443         size_t off = 0;
1444         sum = *csum;
1445         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1446                 WARN_ON(1);
1447                 return false;
1448         }
1449         if (unlikely(i->count < bytes))
1450                 return false;
1451         iterate_all_kinds(i, bytes, v, ({
1452                 int err = 0;
1453                 next = csum_and_copy_from_user(v.iov_base,
1454                                                (to += v.iov_len) - v.iov_len,
1455                                                v.iov_len, 0, &err);
1456                 if (err)
1457                         return false;
1458                 sum = csum_block_add(sum, next, off);
1459                 off += v.iov_len;
1460                 0;
1461         }), ({
1462                 char *p = kmap_atomic(v.bv_page);
1463                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1464                                       p + v.bv_offset, v.bv_len,
1465                                       sum, off);
1466                 kunmap_atomic(p);
1467                 off += v.bv_len;
1468         }),({
1469                 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1470                                       v.iov_base, v.iov_len,
1471                                       sum, off);
1472                 off += v.iov_len;
1473         })
1474         )
1475         *csum = sum;
1476         iov_iter_advance(i, bytes);
1477         return true;
1478 }
1479 EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1480
1481 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1482                              struct iov_iter *i)
1483 {
1484         struct csum_state *csstate = _csstate;
1485         const char *from = addr;
1486         __wsum sum, next;
1487         size_t off;
1488
1489         if (unlikely(iov_iter_is_pipe(i)))
1490                 return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
1491
1492         sum = csstate->csum;
1493         off = csstate->off;
1494         if (unlikely(iov_iter_is_discard(i))) {
1495                 WARN_ON(1);     /* for now */
1496                 return 0;
1497         }
1498         iterate_and_advance(i, bytes, v, ({
1499                 int err = 0;
1500                 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1501                                              v.iov_base,
1502                                              v.iov_len, 0, &err);
1503                 if (!err) {
1504                         sum = csum_block_add(sum, next, off);
1505                         off += v.iov_len;
1506                 }
1507                 err ? v.iov_len : 0;
1508         }), ({
1509                 char *p = kmap_atomic(v.bv_page);
1510                 sum = csum_and_memcpy(p + v.bv_offset,
1511                                       (from += v.bv_len) - v.bv_len,
1512                                       v.bv_len, sum, off);
1513                 kunmap_atomic(p);
1514                 off += v.bv_len;
1515         }),({
1516                 sum = csum_and_memcpy(v.iov_base,
1517                                      (from += v.iov_len) - v.iov_len,
1518                                      v.iov_len, sum, off);
1519                 off += v.iov_len;
1520         })
1521         )
1522         csstate->csum = sum;
1523         csstate->off = off;
1524         return bytes;
1525 }
1526 EXPORT_SYMBOL(csum_and_copy_to_iter);
1527
1528 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1529                 struct iov_iter *i)
1530 {
1531 #ifdef CONFIG_CRYPTO
1532         struct ahash_request *hash = hashp;
1533         struct scatterlist sg;
1534         size_t copied;
1535
1536         copied = copy_to_iter(addr, bytes, i);
1537         sg_init_one(&sg, addr, copied);
1538         ahash_request_set_crypt(hash, &sg, NULL, copied);
1539         crypto_ahash_update(hash);
1540         return copied;
1541 #else
1542         return 0;
1543 #endif
1544 }
1545 EXPORT_SYMBOL(hash_and_copy_to_iter);
1546
1547 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1548 {
1549         size_t size = i->count;
1550         int npages = 0;
1551
1552         if (!size)
1553                 return 0;
1554         if (unlikely(iov_iter_is_discard(i)))
1555                 return 0;
1556
1557         if (unlikely(iov_iter_is_pipe(i))) {
1558                 struct pipe_inode_info *pipe = i->pipe;
1559                 size_t off;
1560                 int idx;
1561
1562                 if (!sanity(i))
1563                         return 0;
1564
1565                 data_start(i, &idx, &off);
1566                 /* some of this one + all after this one */
1567                 npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
1568                 if (npages >= maxpages)
1569                         return maxpages;
1570         } else iterate_all_kinds(i, size, v, ({
1571                 unsigned long p = (unsigned long)v.iov_base;
1572                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1573                         - p / PAGE_SIZE;
1574                 if (npages >= maxpages)
1575                         return maxpages;
1576         0;}),({
1577                 npages++;
1578                 if (npages >= maxpages)
1579                         return maxpages;
1580         }),({
1581                 unsigned long p = (unsigned long)v.iov_base;
1582                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1583                         - p / PAGE_SIZE;
1584                 if (npages >= maxpages)
1585                         return maxpages;
1586         })
1587         )
1588         return npages;
1589 }
1590 EXPORT_SYMBOL(iov_iter_npages);
1591
1592 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1593 {
1594         *new = *old;
1595         if (unlikely(iov_iter_is_pipe(new))) {
1596                 WARN_ON(1);
1597                 return NULL;
1598         }
1599         if (unlikely(iov_iter_is_discard(new)))
1600                 return NULL;
1601         if (iov_iter_is_bvec(new))
1602                 return new->bvec = kmemdup(new->bvec,
1603                                     new->nr_segs * sizeof(struct bio_vec),
1604                                     flags);
1605         else
1606                 /* iovec and kvec have identical layout */
1607                 return new->iov = kmemdup(new->iov,
1608                                    new->nr_segs * sizeof(struct iovec),
1609                                    flags);
1610 }
1611 EXPORT_SYMBOL(dup_iter);
1612
1613 /**
1614  * import_iovec() - Copy an array of &struct iovec from userspace
1615  *     into the kernel, check that it is valid, and initialize a new
1616  *     &struct iov_iter iterator to access it.
1617  *
1618  * @type: One of %READ or %WRITE.
1619  * @uvector: Pointer to the userspace array.
1620  * @nr_segs: Number of elements in userspace array.
1621  * @fast_segs: Number of elements in @iov.
1622  * @iov: (input and output parameter) Pointer to pointer to (usually small
1623  *     on-stack) kernel array.
1624  * @i: Pointer to iterator that will be initialized on success.
1625  *
1626  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1627  * then this function places %NULL in *@iov on return. Otherwise, a new
1628  * array will be allocated and the result placed in *@iov. This means that
1629  * the caller may call kfree() on *@iov regardless of whether the small
1630  * on-stack array was used or not (and regardless of whether this function
1631  * returns an error or not).
1632  *
1633  * Return: Negative error code on error, bytes imported on success
1634  */
1635 ssize_t import_iovec(int type, const struct iovec __user * uvector,
1636                  unsigned nr_segs, unsigned fast_segs,
1637                  struct iovec **iov, struct iov_iter *i)
1638 {
1639         ssize_t n;
1640         struct iovec *p;
1641         n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1642                                   *iov, &p);
1643         if (n < 0) {
1644                 if (p != *iov)
1645                         kfree(p);
1646                 *iov = NULL;
1647                 return n;
1648         }
1649         iov_iter_init(i, type, p, nr_segs, n);
1650         *iov = p == *iov ? NULL : p;
1651         return n;
1652 }
1653 EXPORT_SYMBOL(import_iovec);
1654
1655 #ifdef CONFIG_COMPAT
1656 #include <linux/compat.h>
1657
1658 ssize_t compat_import_iovec(int type,
1659                 const struct compat_iovec __user * uvector,
1660                 unsigned nr_segs, unsigned fast_segs,
1661                 struct iovec **iov, struct iov_iter *i)
1662 {
1663         ssize_t n;
1664         struct iovec *p;
1665         n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1666                                   *iov, &p);
1667         if (n < 0) {
1668                 if (p != *iov)
1669                         kfree(p);
1670                 *iov = NULL;
1671                 return n;
1672         }
1673         iov_iter_init(i, type, p, nr_segs, n);
1674         *iov = p == *iov ? NULL : p;
1675         return n;
1676 }
1677 #endif
1678
1679 int import_single_range(int rw, void __user *buf, size_t len,
1680                  struct iovec *iov, struct iov_iter *i)
1681 {
1682         if (len > MAX_RW_COUNT)
1683                 len = MAX_RW_COUNT;
1684         if (unlikely(!access_ok(buf, len)))
1685                 return -EFAULT;
1686
1687         iov->iov_base = buf;
1688         iov->iov_len = len;
1689         iov_iter_init(i, rw, iov, 1, len);
1690         return 0;
1691 }
1692 EXPORT_SYMBOL(import_single_range);
1693
1694 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
1695                             int (*f)(struct kvec *vec, void *context),
1696                             void *context)
1697 {
1698         struct kvec w;
1699         int err = -EINVAL;
1700         if (!bytes)
1701                 return 0;
1702
1703         iterate_all_kinds(i, bytes, v, -EINVAL, ({
1704                 w.iov_base = kmap(v.bv_page) + v.bv_offset;
1705                 w.iov_len = v.bv_len;
1706                 err = f(&w, context);
1707                 kunmap(v.bv_page);
1708                 err;}), ({
1709                 w = v;
1710                 err = f(&w, context);})
1711         )
1712         return err;
1713 }
1714 EXPORT_SYMBOL(iov_iter_for_each_range);