1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Network filesystem high-level write support.
4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
10 #include <linux/pagemap.h>
11 #include <linux/slab.h>
12 #include <linux/writeback.h>
13 #include <linux/pagevec.h>
17 * netfs_create_write_request - Create a write operation.
18 * @wreq: The write request this is storing from.
19 * @dest: The destination type
20 * @start: Start of the region this write will modify
21 * @len: Length of the modification
22 * @worker: The worker function to handle the write(s)
24 * Allocate a write operation, set it up and add it to the list on a write
27 struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq,
28 enum netfs_io_source dest,
29 loff_t start, size_t len,
32 struct netfs_io_subrequest *subreq;
34 subreq = netfs_alloc_subrequest(wreq);
36 INIT_WORK(&subreq->work, worker);
37 subreq->source = dest;
38 subreq->start = start;
40 subreq->debug_index = wreq->subreq_counter++;
42 switch (subreq->source) {
43 case NETFS_UPLOAD_TO_SERVER:
44 netfs_stat(&netfs_n_wh_upload);
46 case NETFS_WRITE_TO_CACHE:
47 netfs_stat(&netfs_n_wh_write);
53 subreq->io_iter = wreq->io_iter;
54 iov_iter_advance(&subreq->io_iter, subreq->start - wreq->start);
55 iov_iter_truncate(&subreq->io_iter, subreq->len);
57 trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
58 refcount_read(&subreq->ref),
59 netfs_sreq_trace_new);
60 atomic_inc(&wreq->nr_outstanding);
61 list_add_tail(&subreq->rreq_link, &wreq->subrequests);
62 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
67 EXPORT_SYMBOL(netfs_create_write_request);
70 * Process a completed write request once all the component operations have
73 static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async)
75 struct netfs_io_subrequest *subreq;
76 struct netfs_inode *ctx = netfs_inode(wreq->inode);
77 size_t transferred = 0;
79 _enter("R=%x[]", wreq->debug_id);
81 trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
83 list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
84 if (subreq->error || subreq->transferred == 0)
86 transferred += subreq->transferred;
87 if (subreq->transferred < subreq->len)
90 wreq->transferred = transferred;
92 list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
95 switch (subreq->source) {
96 case NETFS_UPLOAD_TO_SERVER:
97 /* Depending on the type of failure, this may prevent
98 * writeback completion unless we're in disconnected
102 wreq->error = subreq->error;
105 case NETFS_WRITE_TO_CACHE:
106 /* Failure doesn't prevent writeback completion unless
107 * we're in disconnected mode.
109 if (subreq->error != -ENOBUFS)
110 ctx->ops->invalidate_cache(wreq);
123 if (wreq->origin == NETFS_DIO_WRITE &&
124 wreq->mapping->nrpages) {
125 pgoff_t first = wreq->start >> PAGE_SHIFT;
126 pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
127 invalidate_inode_pages2_range(wreq->mapping, first, last);
130 if (wreq->origin == NETFS_DIO_WRITE)
131 inode_dio_end(wreq->inode);
134 trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
135 clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
136 wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
139 wreq->iocb->ki_pos += transferred;
140 if (wreq->iocb->ki_complete)
141 wreq->iocb->ki_complete(
142 wreq->iocb, wreq->error ? wreq->error : transferred);
145 netfs_clear_subrequests(wreq, was_async);
146 netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete);
150 * Deal with the completion of writing the data to the cache.
152 void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
155 struct netfs_io_subrequest *subreq = _op;
156 struct netfs_io_request *wreq = subreq->rreq;
159 _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
161 switch (subreq->source) {
162 case NETFS_UPLOAD_TO_SERVER:
163 netfs_stat(&netfs_n_wh_upload_done);
165 case NETFS_WRITE_TO_CACHE:
166 netfs_stat(&netfs_n_wh_write_done);
168 case NETFS_INVALID_WRITE:
174 if (IS_ERR_VALUE(transferred_or_error)) {
175 subreq->error = transferred_or_error;
176 trace_netfs_failure(wreq, subreq, transferred_or_error,
181 if (WARN(transferred_or_error > subreq->len - subreq->transferred,
182 "Subreq excess write: R%x[%x] %zd > %zu - %zu",
183 wreq->debug_id, subreq->debug_index,
184 transferred_or_error, subreq->len, subreq->transferred))
185 transferred_or_error = subreq->len - subreq->transferred;
188 subreq->transferred += transferred_or_error;
190 if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred)
191 pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n",
192 wreq->debug_id, subreq->debug_index,
193 iov_iter_count(&subreq->io_iter), subreq->len,
194 subreq->transferred, subreq->io_iter.iter_type);
196 if (subreq->transferred < subreq->len)
199 __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
201 trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
203 /* If we decrement nr_outstanding to 0, the ref belongs to us. */
204 u = atomic_dec_return(&wreq->nr_outstanding);
206 netfs_write_terminated(wreq, was_async);
208 wake_up_var(&wreq->nr_outstanding);
210 netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
214 if (transferred_or_error == 0) {
215 if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
216 subreq->error = -ENODATA;
220 __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
223 __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags);
224 set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags);
228 switch (subreq->source) {
229 case NETFS_WRITE_TO_CACHE:
230 netfs_stat(&netfs_n_wh_write_failed);
231 set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags);
233 case NETFS_UPLOAD_TO_SERVER:
234 netfs_stat(&netfs_n_wh_upload_failed);
235 set_bit(NETFS_RREQ_FAILED, &wreq->flags);
236 wreq->error = subreq->error;
243 EXPORT_SYMBOL(netfs_write_subrequest_terminated);
245 static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq)
247 struct netfs_io_request *wreq = subreq->rreq;
248 struct netfs_cache_resources *cres = &wreq->cache_resources;
250 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
252 cres->ops->write(cres, subreq->start, &subreq->io_iter,
253 netfs_write_subrequest_terminated, subreq);
256 static void netfs_write_to_cache_op_worker(struct work_struct *work)
258 struct netfs_io_subrequest *subreq =
259 container_of(work, struct netfs_io_subrequest, work);
261 netfs_write_to_cache_op(subreq);
265 * netfs_queue_write_request - Queue a write request for attention
266 * @subreq: The write request to be queued
268 * Queue the specified write request for processing by a worker thread. We
269 * pass the caller's ref on the request to the worker thread.
271 void netfs_queue_write_request(struct netfs_io_subrequest *subreq)
273 if (!queue_work(system_unbound_wq, &subreq->work))
274 netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_wip);
276 EXPORT_SYMBOL(netfs_queue_write_request);
279 * Set up a op for writing to the cache.
281 static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq)
283 struct netfs_cache_resources *cres = &wreq->cache_resources;
284 struct netfs_io_subrequest *subreq;
285 struct netfs_inode *ctx = netfs_inode(wreq->inode);
286 struct fscache_cookie *cookie = netfs_i_cookie(ctx);
287 loff_t start = wreq->start;
288 size_t len = wreq->len;
291 if (!fscache_cookie_enabled(cookie)) {
292 clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags);
296 _debug("write to cache");
297 ret = fscache_begin_write_operation(cres, cookie);
301 ret = cres->ops->prepare_write(cres, &start, &len, wreq->upper_len,
302 i_size_read(wreq->inode), true);
306 subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len,
307 netfs_write_to_cache_op_worker);
311 netfs_write_to_cache_op(subreq);
315 * Begin the process of writing out a chunk of data.
317 * We are given a write request that holds a series of dirty regions and
318 * (partially) covers a sequence of folios, all of which are present. The
319 * pages must have been marked as writeback as appropriate.
321 * We need to perform the following steps:
323 * (1) If encrypting, create an output buffer and encrypt each block of the
324 * data into it, otherwise the output buffer will point to the original
327 * (2) If the data is to be cached, set up a write op for the entire output
328 * buffer to the cache, if the cache wants to accept it.
330 * (3) If the data is to be uploaded (ie. not merely cached):
332 * (a) If the data is to be compressed, create a compression buffer and
333 * compress the data into it.
335 * (b) For each destination we want to upload to, set up write ops to write
336 * to that destination. We may need multiple writes if the data is not
337 * contiguous or the span exceeds wsize for a server.
339 int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
340 enum netfs_write_trace what)
342 struct netfs_inode *ctx = netfs_inode(wreq->inode);
344 _enter("R=%x %llx-%llx f=%lx",
345 wreq->debug_id, wreq->start, wreq->start + wreq->len - 1,
348 trace_netfs_write(wreq, what);
349 if (wreq->len == 0 || wreq->iter.count == 0) {
350 pr_err("Zero-sized write [R=%x]\n", wreq->debug_id);
354 if (wreq->origin == NETFS_DIO_WRITE)
355 inode_dio_begin(wreq->inode);
357 wreq->io_iter = wreq->iter;
359 /* ->outstanding > 0 carries a ref */
360 netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding);
361 atomic_set(&wreq->nr_outstanding, 1);
363 /* Start the encryption/compression going. We can do that in the
364 * background whilst we generate a list of write ops that we want to
367 // TODO: Encrypt or compress the region as appropriate
369 /* We need to write all of the region to the cache */
370 if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
371 netfs_set_up_write_to_cache(wreq);
373 /* However, we don't necessarily write all of the region to the server.
374 * Caching of reads is being managed this way also.
376 if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
377 ctx->ops->create_write_requests(wreq, wreq->start, wreq->len);
379 if (atomic_dec_and_test(&wreq->nr_outstanding))
380 netfs_write_terminated(wreq, false);
385 wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
386 TASK_UNINTERRUPTIBLE);
391 * Begin a write operation for writing through the pagecache.
393 struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
395 struct netfs_io_request *wreq;
396 struct file *file = iocb->ki_filp;
398 wreq = netfs_alloc_request(file->f_mapping, file, iocb->ki_pos, len,
403 trace_netfs_write(wreq, netfs_write_trace_writethrough);
405 __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
406 iov_iter_xarray(&wreq->iter, ITER_SOURCE, &wreq->mapping->i_pages, wreq->start, 0);
407 wreq->io_iter = wreq->iter;
409 /* ->outstanding > 0 carries a ref */
410 netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding);
411 atomic_set(&wreq->nr_outstanding, 1);
415 static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final)
417 struct netfs_inode *ictx = netfs_inode(wreq->inode);
418 unsigned long long start;
421 if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
424 start = wreq->start + wreq->submitted;
425 len = wreq->iter.count - wreq->submitted;
427 len /= wreq->wsize; /* Round to number of maximum packets */
431 ictx->ops->create_write_requests(wreq, start, len);
432 wreq->submitted += len;
436 * Advance the state of the write operation used when writing through the
437 * pagecache. Data has been copied into the pagecache that we need to append
438 * to the request. If we've added more than wsize then we need to create a new
441 int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end)
443 _enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u",
444 wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end);
446 wreq->iter.count += copied;
447 wreq->io_iter.count += copied;
448 if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize)
449 netfs_submit_writethrough(wreq, false);
455 * End a write operation used when writing through the pagecache.
457 int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb)
459 int ret = -EIOCBQUEUED;
461 _enter("ic=%zu sb=%zu ws=%u",
462 wreq->iter.count, wreq->submitted, wreq->wsize);
464 if (wreq->submitted < wreq->io_iter.count)
465 netfs_submit_writethrough(wreq, true);
467 if (atomic_dec_and_test(&wreq->nr_outstanding))
468 netfs_write_terminated(wreq, false);
470 if (is_sync_kiocb(iocb)) {
471 wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
472 TASK_UNINTERRUPTIBLE);
476 netfs_put_request(wreq, false, netfs_rreq_trace_put_return);