GNU Linux-libre 6.8.7-gnu
[releases.git] / drivers / nvme / target / io-cmd-bdev.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * NVMe I/O command implementation.
4  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5  */
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 #include <linux/blkdev.h>
8 #include <linux/blk-integrity.h>
9 #include <linux/memremap.h>
10 #include <linux/module.h>
11 #include "nvmet.h"
12
13 void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
14 {
15         /* Logical blocks per physical block, 0's based. */
16         const __le16 lpp0b = to0based(bdev_physical_block_size(bdev) /
17                                       bdev_logical_block_size(bdev));
18
19         /*
20          * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
21          * NAWUPF, and NACWU are defined for this namespace and should be
22          * used by the host for this namespace instead of the AWUN, AWUPF,
23          * and ACWU fields in the Identify Controller data structure. If
24          * any of these fields are zero that means that the corresponding
25          * field from the identify controller data structure should be used.
26          */
27         id->nsfeat |= 1 << 1;
28         id->nawun = lpp0b;
29         id->nawupf = lpp0b;
30         id->nacwu = lpp0b;
31
32         /*
33          * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
34          * NOWS are defined for this namespace and should be used by
35          * the host for I/O optimization.
36          */
37         id->nsfeat |= 1 << 4;
38         /* NPWG = Namespace Preferred Write Granularity. 0's based */
39         id->npwg = lpp0b;
40         /* NPWA = Namespace Preferred Write Alignment. 0's based */
41         id->npwa = id->npwg;
42         /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
43         id->npdg = to0based(bdev_discard_granularity(bdev) /
44                             bdev_logical_block_size(bdev));
45         /* NPDG = Namespace Preferred Deallocate Alignment */
46         id->npda = id->npdg;
47         /* NOWS = Namespace Optimal Write Size */
48         id->nows = to0based(bdev_io_opt(bdev) / bdev_logical_block_size(bdev));
49 }
50
51 void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
52 {
53         if (ns->bdev_handle) {
54                 bdev_release(ns->bdev_handle);
55                 ns->bdev = NULL;
56                 ns->bdev_handle = NULL;
57         }
58 }
59
60 static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
61 {
62         struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
63
64         if (bi) {
65                 ns->metadata_size = bi->tuple_size;
66                 if (bi->profile == &t10_pi_type1_crc)
67                         ns->pi_type = NVME_NS_DPS_PI_TYPE1;
68                 else if (bi->profile == &t10_pi_type3_crc)
69                         ns->pi_type = NVME_NS_DPS_PI_TYPE3;
70                 else
71                         /* Unsupported metadata type */
72                         ns->metadata_size = 0;
73         }
74 }
75
76 int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
77 {
78         int ret;
79
80         /*
81          * When buffered_io namespace attribute is enabled that means user want
82          * this block device to be used as a file, so block device can take
83          * an advantage of cache.
84          */
85         if (ns->buffered_io)
86                 return -ENOTBLK;
87
88         ns->bdev_handle = bdev_open_by_path(ns->device_path,
89                                 BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, NULL);
90         if (IS_ERR(ns->bdev_handle)) {
91                 ret = PTR_ERR(ns->bdev_handle);
92                 if (ret != -ENOTBLK) {
93                         pr_err("failed to open block device %s: (%d)\n",
94                                         ns->device_path, ret);
95                 }
96                 ns->bdev_handle = NULL;
97                 return ret;
98         }
99         ns->bdev = ns->bdev_handle->bdev;
100         ns->size = bdev_nr_bytes(ns->bdev);
101         ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
102
103         ns->pi_type = 0;
104         ns->metadata_size = 0;
105         if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
106                 nvmet_bdev_ns_enable_integrity(ns);
107
108         if (bdev_is_zoned(ns->bdev)) {
109                 if (!nvmet_bdev_zns_enable(ns)) {
110                         nvmet_bdev_ns_disable(ns);
111                         return -EINVAL;
112                 }
113                 ns->csi = NVME_CSI_ZNS;
114         }
115
116         return 0;
117 }
118
119 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
120 {
121         ns->size = bdev_nr_bytes(ns->bdev);
122 }
123
124 u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
125 {
126         u16 status = NVME_SC_SUCCESS;
127
128         if (likely(blk_sts == BLK_STS_OK))
129                 return status;
130         /*
131          * Right now there exists M : 1 mapping between block layer error
132          * to the NVMe status code (see nvme_error_status()). For consistency,
133          * when we reverse map we use most appropriate NVMe Status code from
134          * the group of the NVMe staus codes used in the nvme_error_status().
135          */
136         switch (blk_sts) {
137         case BLK_STS_NOSPC:
138                 status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
139                 req->error_loc = offsetof(struct nvme_rw_command, length);
140                 break;
141         case BLK_STS_TARGET:
142                 status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
143                 req->error_loc = offsetof(struct nvme_rw_command, slba);
144                 break;
145         case BLK_STS_NOTSUPP:
146                 req->error_loc = offsetof(struct nvme_common_command, opcode);
147                 switch (req->cmd->common.opcode) {
148                 case nvme_cmd_dsm:
149                 case nvme_cmd_write_zeroes:
150                         status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
151                         break;
152                 default:
153                         status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
154                 }
155                 break;
156         case BLK_STS_MEDIUM:
157                 status = NVME_SC_ACCESS_DENIED;
158                 req->error_loc = offsetof(struct nvme_rw_command, nsid);
159                 break;
160         case BLK_STS_IOERR:
161         default:
162                 status = NVME_SC_INTERNAL | NVME_SC_DNR;
163                 req->error_loc = offsetof(struct nvme_common_command, opcode);
164         }
165
166         switch (req->cmd->common.opcode) {
167         case nvme_cmd_read:
168         case nvme_cmd_write:
169                 req->error_slba = le64_to_cpu(req->cmd->rw.slba);
170                 break;
171         case nvme_cmd_write_zeroes:
172                 req->error_slba =
173                         le64_to_cpu(req->cmd->write_zeroes.slba);
174                 break;
175         default:
176                 req->error_slba = 0;
177         }
178         return status;
179 }
180
181 static void nvmet_bio_done(struct bio *bio)
182 {
183         struct nvmet_req *req = bio->bi_private;
184
185         nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
186         nvmet_req_bio_put(req, bio);
187 }
188
189 #ifdef CONFIG_BLK_DEV_INTEGRITY
190 static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
191                                 struct sg_mapping_iter *miter)
192 {
193         struct blk_integrity *bi;
194         struct bio_integrity_payload *bip;
195         int rc;
196         size_t resid, len;
197
198         bi = bdev_get_integrity(req->ns->bdev);
199         if (unlikely(!bi)) {
200                 pr_err("Unable to locate bio_integrity\n");
201                 return -ENODEV;
202         }
203
204         bip = bio_integrity_alloc(bio, GFP_NOIO,
205                                         bio_max_segs(req->metadata_sg_cnt));
206         if (IS_ERR(bip)) {
207                 pr_err("Unable to allocate bio_integrity_payload\n");
208                 return PTR_ERR(bip);
209         }
210
211         /* virtual start sector must be in integrity interval units */
212         bip_set_seed(bip, bio->bi_iter.bi_sector >>
213                      (bi->interval_exp - SECTOR_SHIFT));
214
215         resid = bio_integrity_bytes(bi, bio_sectors(bio));
216         while (resid > 0 && sg_miter_next(miter)) {
217                 len = min_t(size_t, miter->length, resid);
218                 rc = bio_integrity_add_page(bio, miter->page, len,
219                                             offset_in_page(miter->addr));
220                 if (unlikely(rc != len)) {
221                         pr_err("bio_integrity_add_page() failed; %d\n", rc);
222                         sg_miter_stop(miter);
223                         return -ENOMEM;
224                 }
225
226                 resid -= len;
227                 if (len < miter->length)
228                         miter->consumed -= miter->length - len;
229         }
230         sg_miter_stop(miter);
231
232         return 0;
233 }
234 #else
235 static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
236                                 struct sg_mapping_iter *miter)
237 {
238         return -EINVAL;
239 }
240 #endif /* CONFIG_BLK_DEV_INTEGRITY */
241
242 static void nvmet_bdev_execute_rw(struct nvmet_req *req)
243 {
244         unsigned int sg_cnt = req->sg_cnt;
245         struct bio *bio;
246         struct scatterlist *sg;
247         struct blk_plug plug;
248         sector_t sector;
249         blk_opf_t opf;
250         int i, rc;
251         struct sg_mapping_iter prot_miter;
252         unsigned int iter_flags;
253         unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
254
255         if (!nvmet_check_transfer_len(req, total_len))
256                 return;
257
258         if (!req->sg_cnt) {
259                 nvmet_req_complete(req, 0);
260                 return;
261         }
262
263         if (req->cmd->rw.opcode == nvme_cmd_write) {
264                 opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
265                 if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
266                         opf |= REQ_FUA;
267                 iter_flags = SG_MITER_TO_SG;
268         } else {
269                 opf = REQ_OP_READ;
270                 iter_flags = SG_MITER_FROM_SG;
271         }
272
273         if (is_pci_p2pdma_page(sg_page(req->sg)))
274                 opf |= REQ_NOMERGE;
275
276         sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
277
278         if (nvmet_use_inline_bvec(req)) {
279                 bio = &req->b.inline_bio;
280                 bio_init(bio, req->ns->bdev, req->inline_bvec,
281                          ARRAY_SIZE(req->inline_bvec), opf);
282         } else {
283                 bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), opf,
284                                 GFP_KERNEL);
285         }
286         bio->bi_iter.bi_sector = sector;
287         bio->bi_private = req;
288         bio->bi_end_io = nvmet_bio_done;
289
290         blk_start_plug(&plug);
291         if (req->metadata_len)
292                 sg_miter_start(&prot_miter, req->metadata_sg,
293                                req->metadata_sg_cnt, iter_flags);
294
295         for_each_sg(req->sg, sg, req->sg_cnt, i) {
296                 while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
297                                 != sg->length) {
298                         struct bio *prev = bio;
299
300                         if (req->metadata_len) {
301                                 rc = nvmet_bdev_alloc_bip(req, bio,
302                                                           &prot_miter);
303                                 if (unlikely(rc)) {
304                                         bio_io_error(bio);
305                                         return;
306                                 }
307                         }
308
309                         bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
310                                         opf, GFP_KERNEL);
311                         bio->bi_iter.bi_sector = sector;
312
313                         bio_chain(bio, prev);
314                         submit_bio(prev);
315                 }
316
317                 sector += sg->length >> 9;
318                 sg_cnt--;
319         }
320
321         if (req->metadata_len) {
322                 rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
323                 if (unlikely(rc)) {
324                         bio_io_error(bio);
325                         return;
326                 }
327         }
328
329         submit_bio(bio);
330         blk_finish_plug(&plug);
331 }
332
333 static void nvmet_bdev_execute_flush(struct nvmet_req *req)
334 {
335         struct bio *bio = &req->b.inline_bio;
336
337         if (!bdev_write_cache(req->ns->bdev)) {
338                 nvmet_req_complete(req, NVME_SC_SUCCESS);
339                 return;
340         }
341
342         if (!nvmet_check_transfer_len(req, 0))
343                 return;
344
345         bio_init(bio, req->ns->bdev, req->inline_bvec,
346                  ARRAY_SIZE(req->inline_bvec), REQ_OP_WRITE | REQ_PREFLUSH);
347         bio->bi_private = req;
348         bio->bi_end_io = nvmet_bio_done;
349
350         submit_bio(bio);
351 }
352
353 u16 nvmet_bdev_flush(struct nvmet_req *req)
354 {
355         if (!bdev_write_cache(req->ns->bdev))
356                 return 0;
357
358         if (blkdev_issue_flush(req->ns->bdev))
359                 return NVME_SC_INTERNAL | NVME_SC_DNR;
360         return 0;
361 }
362
363 static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
364                 struct nvme_dsm_range *range, struct bio **bio)
365 {
366         struct nvmet_ns *ns = req->ns;
367         int ret;
368
369         ret = __blkdev_issue_discard(ns->bdev,
370                         nvmet_lba_to_sect(ns, range->slba),
371                         le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
372                         GFP_KERNEL, bio);
373         if (ret && ret != -EOPNOTSUPP) {
374                 req->error_slba = le64_to_cpu(range->slba);
375                 return errno_to_nvme_status(req, ret);
376         }
377         return NVME_SC_SUCCESS;
378 }
379
380 static void nvmet_bdev_execute_discard(struct nvmet_req *req)
381 {
382         struct nvme_dsm_range range;
383         struct bio *bio = NULL;
384         int i;
385         u16 status;
386
387         for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
388                 status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
389                                 sizeof(range));
390                 if (status)
391                         break;
392
393                 status = nvmet_bdev_discard_range(req, &range, &bio);
394                 if (status)
395                         break;
396         }
397
398         if (bio) {
399                 bio->bi_private = req;
400                 bio->bi_end_io = nvmet_bio_done;
401                 if (status)
402                         bio_io_error(bio);
403                 else
404                         submit_bio(bio);
405         } else {
406                 nvmet_req_complete(req, status);
407         }
408 }
409
410 static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
411 {
412         if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
413                 return;
414
415         switch (le32_to_cpu(req->cmd->dsm.attributes)) {
416         case NVME_DSMGMT_AD:
417                 nvmet_bdev_execute_discard(req);
418                 return;
419         case NVME_DSMGMT_IDR:
420         case NVME_DSMGMT_IDW:
421         default:
422                 /* Not supported yet */
423                 nvmet_req_complete(req, 0);
424                 return;
425         }
426 }
427
428 static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
429 {
430         struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
431         struct bio *bio = NULL;
432         sector_t sector;
433         sector_t nr_sector;
434         int ret;
435
436         if (!nvmet_check_transfer_len(req, 0))
437                 return;
438
439         sector = nvmet_lba_to_sect(req->ns, write_zeroes->slba);
440         nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
441                 (req->ns->blksize_shift - 9));
442
443         ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
444                         GFP_KERNEL, &bio, 0);
445         if (bio) {
446                 bio->bi_private = req;
447                 bio->bi_end_io = nvmet_bio_done;
448                 submit_bio(bio);
449         } else {
450                 nvmet_req_complete(req, errno_to_nvme_status(req, ret));
451         }
452 }
453
454 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
455 {
456         switch (req->cmd->common.opcode) {
457         case nvme_cmd_read:
458         case nvme_cmd_write:
459                 req->execute = nvmet_bdev_execute_rw;
460                 if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
461                         req->metadata_len = nvmet_rw_metadata_len(req);
462                 return 0;
463         case nvme_cmd_flush:
464                 req->execute = nvmet_bdev_execute_flush;
465                 return 0;
466         case nvme_cmd_dsm:
467                 req->execute = nvmet_bdev_execute_dsm;
468                 return 0;
469         case nvme_cmd_write_zeroes:
470                 req->execute = nvmet_bdev_execute_write_zeroes;
471                 return 0;
472         default:
473                 return nvmet_report_invalid_opcode(req);
474         }
475 }