GNU Linux-libre 5.10.217-gnu1
[releases.git] / drivers / nvme / target / passthru.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * NVMe Over Fabrics Target Passthrough command implementation.
4  *
5  * Copyright (c) 2017-2018 Western Digital Corporation or its
6  * affiliates.
7  * Copyright (c) 2019-2020, Eideticom Inc.
8  *
9  */
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #include <linux/module.h>
12
13 #include "../host/nvme.h"
14 #include "nvmet.h"
15
16 MODULE_IMPORT_NS(NVME_TARGET_PASSTHRU);
17
18 /*
19  * xarray to maintain one passthru subsystem per nvme controller.
20  */
21 static DEFINE_XARRAY(passthru_subsystems);
22
23 static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
24 {
25         struct nvmet_ctrl *ctrl = req->sq->ctrl;
26         struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
27         u16 status = NVME_SC_SUCCESS;
28         struct nvme_id_ctrl *id;
29         int max_hw_sectors;
30         int page_shift;
31
32         id = kzalloc(sizeof(*id), GFP_KERNEL);
33         if (!id)
34                 return NVME_SC_INTERNAL;
35
36         status = nvmet_copy_from_sgl(req, 0, id, sizeof(*id));
37         if (status)
38                 goto out_free;
39
40         id->cntlid = cpu_to_le16(ctrl->cntlid);
41         id->ver = cpu_to_le32(ctrl->subsys->ver);
42
43         /*
44          * The passthru NVMe driver may have a limit on the number of segments
45          * which depends on the host's memory fragementation. To solve this,
46          * ensure mdts is limited to the pages equal to the number of segments.
47          */
48         max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
49                                       pctrl->max_hw_sectors);
50
51         /*
52          * nvmet_passthru_map_sg is limitted to using a single bio so limit
53          * the mdts based on BIO_MAX_PAGES as well
54          */
55         max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9),
56                                       max_hw_sectors);
57
58         page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
59
60         id->mdts = ilog2(max_hw_sectors) + 9 - page_shift;
61
62         id->acl = 3;
63         /*
64          * We export aerl limit for the fabrics controller, update this when
65          * passthru based aerl support is added.
66          */
67         id->aerl = NVMET_ASYNC_EVENTS - 1;
68
69         /* emulate kas as most of the PCIe ctrl don't have a support for kas */
70         id->kas = cpu_to_le16(NVMET_KAS);
71
72         /* don't support host memory buffer */
73         id->hmpre = 0;
74         id->hmmin = 0;
75
76         id->sqes = min_t(__u8, ((0x6 << 4) | 0x6), id->sqes);
77         id->cqes = min_t(__u8, ((0x4 << 4) | 0x4), id->cqes);
78         id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
79
80         /* don't support fuse commands */
81         id->fuses = 0;
82
83         id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
84         if (ctrl->ops->flags & NVMF_KEYED_SGLS)
85                 id->sgls |= cpu_to_le32(1 << 2);
86         if (req->port->inline_data_size)
87                 id->sgls |= cpu_to_le32(1 << 20);
88
89         /*
90          * When passsthru controller is setup using nvme-loop transport it will
91          * export the passthru ctrl subsysnqn (PCIe NVMe ctrl) and will fail in
92          * the nvme/host/core.c in the nvme_init_subsystem()->nvme_active_ctrl()
93          * code path with duplicate ctr subsynqn. In order to prevent that we
94          * mask the passthru-ctrl subsysnqn with the target ctrl subsysnqn.
95          */
96         memcpy(id->subnqn, ctrl->subsysnqn, sizeof(id->subnqn));
97
98         /* use fabric id-ctrl values */
99         id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
100                                 req->port->inline_data_size) / 16);
101         id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
102
103         id->msdbd = ctrl->ops->msdbd;
104
105         /* Support multipath connections with fabrics */
106         id->cmic |= 1 << 1;
107
108         /* Disable reservations, see nvmet_parse_passthru_io_cmd() */
109         id->oncs &= cpu_to_le16(~NVME_CTRL_ONCS_RESERVATIONS);
110
111         status = nvmet_copy_to_sgl(req, 0, id, sizeof(struct nvme_id_ctrl));
112
113 out_free:
114         kfree(id);
115         return status;
116 }
117
118 static u16 nvmet_passthru_override_id_ns(struct nvmet_req *req)
119 {
120         u16 status = NVME_SC_SUCCESS;
121         struct nvme_id_ns *id;
122         int i;
123
124         id = kzalloc(sizeof(*id), GFP_KERNEL);
125         if (!id)
126                 return NVME_SC_INTERNAL;
127
128         status = nvmet_copy_from_sgl(req, 0, id, sizeof(struct nvme_id_ns));
129         if (status)
130                 goto out_free;
131
132         for (i = 0; i < (id->nlbaf + 1); i++)
133                 if (id->lbaf[i].ms)
134                         memset(&id->lbaf[i], 0, sizeof(id->lbaf[i]));
135
136         id->flbas = id->flbas & ~(1 << 4);
137
138         /*
139          * Presently the NVMEof target code does not support sending
140          * metadata, so we must disable it here. This should be updated
141          * once target starts supporting metadata.
142          */
143         id->mc = 0;
144
145         status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
146
147 out_free:
148         kfree(id);
149         return status;
150 }
151
152 static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
153 {
154         struct nvmet_req *req = container_of(w, struct nvmet_req, p.work);
155         struct request *rq = req->p.rq;
156         u16 status;
157
158         nvme_execute_passthru_rq(rq);
159
160         status = nvme_req(rq)->status;
161         if (status == NVME_SC_SUCCESS &&
162             req->cmd->common.opcode == nvme_admin_identify) {
163                 switch (req->cmd->identify.cns) {
164                 case NVME_ID_CNS_CTRL:
165                         nvmet_passthru_override_id_ctrl(req);
166                         break;
167                 case NVME_ID_CNS_NS:
168                         nvmet_passthru_override_id_ns(req);
169                         break;
170                 }
171         }
172
173         req->cqe->result = nvme_req(rq)->result;
174         nvmet_req_complete(req, status);
175         blk_mq_free_request(rq);
176 }
177
178 static void nvmet_passthru_req_done(struct request *rq,
179                                     blk_status_t blk_status)
180 {
181         struct nvmet_req *req = rq->end_io_data;
182
183         req->cqe->result = nvme_req(rq)->result;
184         nvmet_req_complete(req, nvme_req(rq)->status);
185         blk_mq_free_request(rq);
186 }
187
188 static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
189 {
190         struct scatterlist *sg;
191         int op_flags = 0;
192         struct bio *bio;
193         int i, ret;
194
195         if (req->sg_cnt > BIO_MAX_PAGES)
196                 return -EINVAL;
197
198         if (req->cmd->common.opcode == nvme_cmd_flush)
199                 op_flags = REQ_FUA;
200         else if (nvme_is_write(req->cmd))
201                 op_flags = REQ_SYNC | REQ_IDLE;
202
203         bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
204         bio->bi_end_io = bio_put;
205         bio->bi_opf = req_op(rq) | op_flags;
206
207         for_each_sg(req->sg, sg, req->sg_cnt, i) {
208                 if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length,
209                                     sg->offset) < sg->length) {
210                         bio_put(bio);
211                         return -EINVAL;
212                 }
213         }
214
215         ret = blk_rq_append_bio(rq, &bio);
216         if (unlikely(ret)) {
217                 bio_put(bio);
218                 return ret;
219         }
220
221         return 0;
222 }
223
224 static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
225 {
226         struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
227         struct request_queue *q = ctrl->admin_q;
228         struct nvme_ns *ns = NULL;
229         struct request *rq = NULL;
230         u32 effects;
231         u16 status;
232         int ret;
233
234         if (likely(req->sq->qid != 0)) {
235                 u32 nsid = le32_to_cpu(req->cmd->common.nsid);
236
237                 ns = nvme_find_get_ns(ctrl, nsid);
238                 if (unlikely(!ns)) {
239                         pr_err("failed to get passthru ns nsid:%u\n", nsid);
240                         status = NVME_SC_INVALID_NS | NVME_SC_DNR;
241                         goto out;
242                 }
243
244                 q = ns->queue;
245         }
246
247         rq = nvme_alloc_request(q, req->cmd, 0);
248         if (IS_ERR(rq)) {
249                 status = NVME_SC_INTERNAL;
250                 goto out_put_ns;
251         }
252
253         if (req->sg_cnt) {
254                 ret = nvmet_passthru_map_sg(req, rq);
255                 if (unlikely(ret)) {
256                         status = NVME_SC_INTERNAL;
257                         goto out_put_req;
258                 }
259         }
260
261         /*
262          * If a command needs post-execution fixups, or there are any
263          * non-trivial effects, make sure to execute the command synchronously
264          * in a workqueue so that nvme_passthru_end gets called.
265          */
266         effects = nvme_command_effects(ctrl, ns, req->cmd->common.opcode);
267         if (req->p.use_workqueue ||
268             (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))) {
269                 INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work);
270                 req->p.rq = rq;
271                 schedule_work(&req->p.work);
272         } else {
273                 rq->end_io_data = req;
274                 blk_execute_rq_nowait(rq->q, ns ? ns->disk : NULL, rq, 0,
275                                       nvmet_passthru_req_done);
276         }
277
278         if (ns)
279                 nvme_put_ns(ns);
280
281         return;
282
283 out_put_req:
284         blk_mq_free_request(rq);
285 out_put_ns:
286         if (ns)
287                 nvme_put_ns(ns);
288 out:
289         nvmet_req_complete(req, status);
290 }
291
292 /*
293  * We need to emulate set host behaviour to ensure that any requested
294  * behaviour of the target's host matches the requested behaviour
295  * of the device's host and fail otherwise.
296  */
297 static void nvmet_passthru_set_host_behaviour(struct nvmet_req *req)
298 {
299         struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
300         struct nvme_feat_host_behavior *host;
301         u16 status = NVME_SC_INTERNAL;
302         int ret;
303
304         host = kzalloc(sizeof(*host) * 2, GFP_KERNEL);
305         if (!host)
306                 goto out_complete_req;
307
308         ret = nvme_get_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0,
309                                 host, sizeof(*host), NULL);
310         if (ret)
311                 goto out_free_host;
312
313         status = nvmet_copy_from_sgl(req, 0, &host[1], sizeof(*host));
314         if (status)
315                 goto out_free_host;
316
317         if (memcmp(&host[0], &host[1], sizeof(host[0]))) {
318                 pr_warn("target host has requested different behaviour from the local host\n");
319                 status = NVME_SC_INTERNAL;
320         }
321
322 out_free_host:
323         kfree(host);
324 out_complete_req:
325         nvmet_req_complete(req, status);
326 }
327
328 static u16 nvmet_setup_passthru_command(struct nvmet_req *req)
329 {
330         req->p.use_workqueue = false;
331         req->execute = nvmet_passthru_execute_cmd;
332         return NVME_SC_SUCCESS;
333 }
334
335 u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
336 {
337         /* Reject any commands with non-sgl flags set (ie. fused commands) */
338         if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
339                 return NVME_SC_INVALID_FIELD;
340
341         switch (req->cmd->common.opcode) {
342         case nvme_cmd_resv_register:
343         case nvme_cmd_resv_report:
344         case nvme_cmd_resv_acquire:
345         case nvme_cmd_resv_release:
346                 /*
347                  * Reservations cannot be supported properly because the
348                  * underlying device has no way of differentiating different
349                  * hosts that connect via fabrics. This could potentially be
350                  * emulated in the future if regular targets grow support for
351                  * this feature.
352                  */
353                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
354         }
355
356         return nvmet_setup_passthru_command(req);
357 }
358
359 /*
360  * Only features that are emulated or specifically allowed in the list  are
361  * passed down to the controller. This function implements the allow list for
362  * both get and set features.
363  */
364 static u16 nvmet_passthru_get_set_features(struct nvmet_req *req)
365 {
366         switch (le32_to_cpu(req->cmd->features.fid)) {
367         case NVME_FEAT_ARBITRATION:
368         case NVME_FEAT_POWER_MGMT:
369         case NVME_FEAT_LBA_RANGE:
370         case NVME_FEAT_TEMP_THRESH:
371         case NVME_FEAT_ERR_RECOVERY:
372         case NVME_FEAT_VOLATILE_WC:
373         case NVME_FEAT_WRITE_ATOMIC:
374         case NVME_FEAT_AUTO_PST:
375         case NVME_FEAT_TIMESTAMP:
376         case NVME_FEAT_HCTM:
377         case NVME_FEAT_NOPSC:
378         case NVME_FEAT_RRL:
379         case NVME_FEAT_PLM_CONFIG:
380         case NVME_FEAT_PLM_WINDOW:
381         case NVME_FEAT_HOST_BEHAVIOR:
382         case NVME_FEAT_SANITIZE:
383         case NVME_FEAT_VENDOR_START ... NVME_FEAT_VENDOR_END:
384                 return nvmet_setup_passthru_command(req);
385
386         case NVME_FEAT_ASYNC_EVENT:
387                 /* There is no support for forwarding ASYNC events */
388         case NVME_FEAT_IRQ_COALESCE:
389         case NVME_FEAT_IRQ_CONFIG:
390                 /* The IRQ settings will not apply to the target controller */
391         case NVME_FEAT_HOST_MEM_BUF:
392                 /*
393                  * Any HMB that's set will not be passed through and will
394                  * not work as expected
395                  */
396         case NVME_FEAT_SW_PROGRESS:
397                 /*
398                  * The Pre-Boot Software Load Count doesn't make much
399                  * sense for a target to export
400                  */
401         case NVME_FEAT_RESV_MASK:
402         case NVME_FEAT_RESV_PERSIST:
403                 /* No reservations, see nvmet_parse_passthru_io_cmd() */
404         default:
405                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
406         }
407 }
408
409 u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
410 {
411         /* Reject any commands with non-sgl flags set (ie. fused commands) */
412         if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
413                 return NVME_SC_INVALID_FIELD;
414
415         /*
416          * Passthru all vendor specific commands
417          */
418         if (req->cmd->common.opcode >= nvme_admin_vendor_start)
419                 return nvmet_setup_passthru_command(req);
420
421         switch (req->cmd->common.opcode) {
422         case nvme_admin_async_event:
423                 req->execute = nvmet_execute_async_event;
424                 return NVME_SC_SUCCESS;
425         case nvme_admin_keep_alive:
426                 /*
427                  * Most PCIe ctrls don't support keep alive cmd, we route keep
428                  * alive to the non-passthru mode. In future please change this
429                  * code when PCIe ctrls with keep alive support available.
430                  */
431                 req->execute = nvmet_execute_keep_alive;
432                 return NVME_SC_SUCCESS;
433         case nvme_admin_set_features:
434                 switch (le32_to_cpu(req->cmd->features.fid)) {
435                 case NVME_FEAT_ASYNC_EVENT:
436                 case NVME_FEAT_KATO:
437                 case NVME_FEAT_NUM_QUEUES:
438                 case NVME_FEAT_HOST_ID:
439                         req->execute = nvmet_execute_set_features;
440                         return NVME_SC_SUCCESS;
441                 case NVME_FEAT_HOST_BEHAVIOR:
442                         req->execute = nvmet_passthru_set_host_behaviour;
443                         return NVME_SC_SUCCESS;
444                 default:
445                         return nvmet_passthru_get_set_features(req);
446                 }
447                 break;
448         case nvme_admin_get_features:
449                 switch (le32_to_cpu(req->cmd->features.fid)) {
450                 case NVME_FEAT_ASYNC_EVENT:
451                 case NVME_FEAT_KATO:
452                 case NVME_FEAT_NUM_QUEUES:
453                 case NVME_FEAT_HOST_ID:
454                         req->execute = nvmet_execute_get_features;
455                         return NVME_SC_SUCCESS;
456                 default:
457                         return nvmet_passthru_get_set_features(req);
458                 }
459                 break;
460         case nvme_admin_identify:
461                 switch (req->cmd->identify.cns) {
462                 case NVME_ID_CNS_CTRL:
463                         req->execute = nvmet_passthru_execute_cmd;
464                         req->p.use_workqueue = true;
465                         return NVME_SC_SUCCESS;
466                 case NVME_ID_CNS_CS_CTRL:
467                         switch (req->cmd->identify.csi) {
468                         case NVME_CSI_ZNS:
469                                 req->execute = nvmet_passthru_execute_cmd;
470                                 req->p.use_workqueue = true;
471                                 return NVME_SC_SUCCESS;
472                         }
473                         return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
474                 case NVME_ID_CNS_NS:
475                         req->execute = nvmet_passthru_execute_cmd;
476                         req->p.use_workqueue = true;
477                         return NVME_SC_SUCCESS;
478                 case NVME_ID_CNS_CS_NS:
479                         switch (req->cmd->identify.csi) {
480                         case NVME_CSI_ZNS:
481                                 req->execute = nvmet_passthru_execute_cmd;
482                                 req->p.use_workqueue = true;
483                                 return NVME_SC_SUCCESS;
484                         }
485                         return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
486                 default:
487                         return nvmet_setup_passthru_command(req);
488                 }
489         case nvme_admin_get_log_page:
490                 return nvmet_setup_passthru_command(req);
491         default:
492                 /* Reject commands not in the allowlist above */
493                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
494         }
495 }
496
497 int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
498 {
499         struct nvme_ctrl *ctrl;
500         struct file *file;
501         int ret = -EINVAL;
502         void *old;
503
504         mutex_lock(&subsys->lock);
505         if (!subsys->passthru_ctrl_path)
506                 goto out_unlock;
507         if (subsys->passthru_ctrl)
508                 goto out_unlock;
509
510         if (subsys->nr_namespaces) {
511                 pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
512                 goto out_unlock;
513         }
514
515         file = filp_open(subsys->passthru_ctrl_path, O_RDWR, 0);
516         if (IS_ERR(file)) {
517                 ret = PTR_ERR(file);
518                 goto out_unlock;
519         }
520
521         ctrl = nvme_ctrl_from_file(file);
522         if (!ctrl) {
523                 pr_err("failed to open nvme controller %s\n",
524                        subsys->passthru_ctrl_path);
525
526                 goto out_put_file;
527         }
528
529         old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL,
530                          subsys, GFP_KERNEL);
531         if (xa_is_err(old)) {
532                 ret = xa_err(old);
533                 goto out_put_file;
534         }
535
536         if (old)
537                 goto out_put_file;
538
539         subsys->passthru_ctrl = ctrl;
540         subsys->ver = ctrl->vs;
541
542         if (subsys->ver < NVME_VS(1, 2, 1)) {
543                 pr_warn("nvme controller version is too old: %llu.%llu.%llu, advertising 1.2.1\n",
544                         NVME_MAJOR(subsys->ver), NVME_MINOR(subsys->ver),
545                         NVME_TERTIARY(subsys->ver));
546                 subsys->ver = NVME_VS(1, 2, 1);
547         }
548         nvme_get_ctrl(ctrl);
549         __module_get(subsys->passthru_ctrl->ops->module);
550         ret = 0;
551
552 out_put_file:
553         filp_close(file, NULL);
554 out_unlock:
555         mutex_unlock(&subsys->lock);
556         return ret;
557 }
558
559 static void __nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
560 {
561         if (subsys->passthru_ctrl) {
562                 xa_erase(&passthru_subsystems, subsys->passthru_ctrl->cntlid);
563                 module_put(subsys->passthru_ctrl->ops->module);
564                 nvme_put_ctrl(subsys->passthru_ctrl);
565         }
566         subsys->passthru_ctrl = NULL;
567         subsys->ver = NVMET_DEFAULT_VS;
568 }
569
570 void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
571 {
572         mutex_lock(&subsys->lock);
573         __nvmet_passthru_ctrl_disable(subsys);
574         mutex_unlock(&subsys->lock);
575 }
576
577 void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys)
578 {
579         mutex_lock(&subsys->lock);
580         __nvmet_passthru_ctrl_disable(subsys);
581         mutex_unlock(&subsys->lock);
582         kfree(subsys->passthru_ctrl_path);
583 }