GNU Linux-libre 5.10.153-gnu1
[releases.git] / drivers / nvme / target / passthru.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * NVMe Over Fabrics Target Passthrough command implementation.
4  *
5  * Copyright (c) 2017-2018 Western Digital Corporation or its
6  * affiliates.
7  * Copyright (c) 2019-2020, Eideticom Inc.
8  *
9  */
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #include <linux/module.h>
12
13 #include "../host/nvme.h"
14 #include "nvmet.h"
15
16 MODULE_IMPORT_NS(NVME_TARGET_PASSTHRU);
17
18 /*
19  * xarray to maintain one passthru subsystem per nvme controller.
20  */
21 static DEFINE_XARRAY(passthru_subsystems);
22
23 static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
24 {
25         struct nvmet_ctrl *ctrl = req->sq->ctrl;
26         struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
27         u16 status = NVME_SC_SUCCESS;
28         struct nvme_id_ctrl *id;
29         int max_hw_sectors;
30         int page_shift;
31
32         id = kzalloc(sizeof(*id), GFP_KERNEL);
33         if (!id)
34                 return NVME_SC_INTERNAL;
35
36         status = nvmet_copy_from_sgl(req, 0, id, sizeof(*id));
37         if (status)
38                 goto out_free;
39
40         id->cntlid = cpu_to_le16(ctrl->cntlid);
41         id->ver = cpu_to_le32(ctrl->subsys->ver);
42
43         /*
44          * The passthru NVMe driver may have a limit on the number of segments
45          * which depends on the host's memory fragementation. To solve this,
46          * ensure mdts is limited to the pages equal to the number of segments.
47          */
48         max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
49                                       pctrl->max_hw_sectors);
50
51         /*
52          * nvmet_passthru_map_sg is limitted to using a single bio so limit
53          * the mdts based on BIO_MAX_PAGES as well
54          */
55         max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9),
56                                       max_hw_sectors);
57
58         page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
59
60         id->mdts = ilog2(max_hw_sectors) + 9 - page_shift;
61
62         id->acl = 3;
63         /*
64          * We export aerl limit for the fabrics controller, update this when
65          * passthru based aerl support is added.
66          */
67         id->aerl = NVMET_ASYNC_EVENTS - 1;
68
69         /* emulate kas as most of the PCIe ctrl don't have a support for kas */
70         id->kas = cpu_to_le16(NVMET_KAS);
71
72         /* don't support host memory buffer */
73         id->hmpre = 0;
74         id->hmmin = 0;
75
76         id->sqes = min_t(__u8, ((0x6 << 4) | 0x6), id->sqes);
77         id->cqes = min_t(__u8, ((0x4 << 4) | 0x4), id->cqes);
78         id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
79
80         /* don't support fuse commands */
81         id->fuses = 0;
82
83         id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
84         if (ctrl->ops->flags & NVMF_KEYED_SGLS)
85                 id->sgls |= cpu_to_le32(1 << 2);
86         if (req->port->inline_data_size)
87                 id->sgls |= cpu_to_le32(1 << 20);
88
89         /*
90          * When passsthru controller is setup using nvme-loop transport it will
91          * export the passthru ctrl subsysnqn (PCIe NVMe ctrl) and will fail in
92          * the nvme/host/core.c in the nvme_init_subsystem()->nvme_active_ctrl()
93          * code path with duplicate ctr subsynqn. In order to prevent that we
94          * mask the passthru-ctrl subsysnqn with the target ctrl subsysnqn.
95          */
96         memcpy(id->subnqn, ctrl->subsysnqn, sizeof(id->subnqn));
97
98         /* use fabric id-ctrl values */
99         id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
100                                 req->port->inline_data_size) / 16);
101         id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
102
103         id->msdbd = ctrl->ops->msdbd;
104
105         /* Support multipath connections with fabrics */
106         id->cmic |= 1 << 1;
107
108         /* Disable reservations, see nvmet_parse_passthru_io_cmd() */
109         id->oncs &= cpu_to_le16(~NVME_CTRL_ONCS_RESERVATIONS);
110
111         status = nvmet_copy_to_sgl(req, 0, id, sizeof(struct nvme_id_ctrl));
112
113 out_free:
114         kfree(id);
115         return status;
116 }
117
118 static u16 nvmet_passthru_override_id_ns(struct nvmet_req *req)
119 {
120         u16 status = NVME_SC_SUCCESS;
121         struct nvme_id_ns *id;
122         int i;
123
124         id = kzalloc(sizeof(*id), GFP_KERNEL);
125         if (!id)
126                 return NVME_SC_INTERNAL;
127
128         status = nvmet_copy_from_sgl(req, 0, id, sizeof(struct nvme_id_ns));
129         if (status)
130                 goto out_free;
131
132         for (i = 0; i < (id->nlbaf + 1); i++)
133                 if (id->lbaf[i].ms)
134                         memset(&id->lbaf[i], 0, sizeof(id->lbaf[i]));
135
136         id->flbas = id->flbas & ~(1 << 4);
137
138         /*
139          * Presently the NVMEof target code does not support sending
140          * metadata, so we must disable it here. This should be updated
141          * once target starts supporting metadata.
142          */
143         id->mc = 0;
144
145         status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
146
147 out_free:
148         kfree(id);
149         return status;
150 }
151
152 static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
153 {
154         struct nvmet_req *req = container_of(w, struct nvmet_req, p.work);
155         struct request *rq = req->p.rq;
156         u16 status;
157
158         nvme_execute_passthru_rq(rq);
159
160         status = nvme_req(rq)->status;
161         if (status == NVME_SC_SUCCESS &&
162             req->cmd->common.opcode == nvme_admin_identify) {
163                 switch (req->cmd->identify.cns) {
164                 case NVME_ID_CNS_CTRL:
165                         nvmet_passthru_override_id_ctrl(req);
166                         break;
167                 case NVME_ID_CNS_NS:
168                         nvmet_passthru_override_id_ns(req);
169                         break;
170                 }
171         }
172
173         req->cqe->result = nvme_req(rq)->result;
174         nvmet_req_complete(req, status);
175         blk_mq_free_request(rq);
176 }
177
178 static void nvmet_passthru_req_done(struct request *rq,
179                                     blk_status_t blk_status)
180 {
181         struct nvmet_req *req = rq->end_io_data;
182
183         req->cqe->result = nvme_req(rq)->result;
184         nvmet_req_complete(req, nvme_req(rq)->status);
185         blk_mq_free_request(rq);
186 }
187
188 static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
189 {
190         struct scatterlist *sg;
191         int op_flags = 0;
192         struct bio *bio;
193         int i, ret;
194
195         if (req->sg_cnt > BIO_MAX_PAGES)
196                 return -EINVAL;
197
198         if (req->cmd->common.opcode == nvme_cmd_flush)
199                 op_flags = REQ_FUA;
200         else if (nvme_is_write(req->cmd))
201                 op_flags = REQ_SYNC | REQ_IDLE;
202
203         bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
204         bio->bi_end_io = bio_put;
205         bio->bi_opf = req_op(rq) | op_flags;
206
207         for_each_sg(req->sg, sg, req->sg_cnt, i) {
208                 if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length,
209                                     sg->offset) < sg->length) {
210                         bio_put(bio);
211                         return -EINVAL;
212                 }
213         }
214
215         ret = blk_rq_append_bio(rq, &bio);
216         if (unlikely(ret)) {
217                 bio_put(bio);
218                 return ret;
219         }
220
221         return 0;
222 }
223
224 static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
225 {
226         struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
227         struct request_queue *q = ctrl->admin_q;
228         struct nvme_ns *ns = NULL;
229         struct request *rq = NULL;
230         u32 effects;
231         u16 status;
232         int ret;
233
234         if (likely(req->sq->qid != 0)) {
235                 u32 nsid = le32_to_cpu(req->cmd->common.nsid);
236
237                 ns = nvme_find_get_ns(ctrl, nsid);
238                 if (unlikely(!ns)) {
239                         pr_err("failed to get passthru ns nsid:%u\n", nsid);
240                         status = NVME_SC_INVALID_NS | NVME_SC_DNR;
241                         goto out;
242                 }
243
244                 q = ns->queue;
245         }
246
247         rq = nvme_alloc_request(q, req->cmd, 0);
248         if (IS_ERR(rq)) {
249                 status = NVME_SC_INTERNAL;
250                 goto out_put_ns;
251         }
252
253         if (req->sg_cnt) {
254                 ret = nvmet_passthru_map_sg(req, rq);
255                 if (unlikely(ret)) {
256                         status = NVME_SC_INTERNAL;
257                         goto out_put_req;
258                 }
259         }
260
261         /*
262          * If there are effects for the command we are about to execute, or
263          * an end_req function we need to use nvme_execute_passthru_rq()
264          * synchronously in a work item seeing the end_req function and
265          * nvme_passthru_end() can't be called in the request done callback
266          * which is typically in interrupt context.
267          */
268         effects = nvme_command_effects(ctrl, ns, req->cmd->common.opcode);
269         if (req->p.use_workqueue || effects) {
270                 INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work);
271                 req->p.rq = rq;
272                 schedule_work(&req->p.work);
273         } else {
274                 rq->end_io_data = req;
275                 blk_execute_rq_nowait(rq->q, ns ? ns->disk : NULL, rq, 0,
276                                       nvmet_passthru_req_done);
277         }
278
279         if (ns)
280                 nvme_put_ns(ns);
281
282         return;
283
284 out_put_req:
285         blk_mq_free_request(rq);
286 out_put_ns:
287         if (ns)
288                 nvme_put_ns(ns);
289 out:
290         nvmet_req_complete(req, status);
291 }
292
293 /*
294  * We need to emulate set host behaviour to ensure that any requested
295  * behaviour of the target's host matches the requested behaviour
296  * of the device's host and fail otherwise.
297  */
298 static void nvmet_passthru_set_host_behaviour(struct nvmet_req *req)
299 {
300         struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
301         struct nvme_feat_host_behavior *host;
302         u16 status = NVME_SC_INTERNAL;
303         int ret;
304
305         host = kzalloc(sizeof(*host) * 2, GFP_KERNEL);
306         if (!host)
307                 goto out_complete_req;
308
309         ret = nvme_get_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0,
310                                 host, sizeof(*host), NULL);
311         if (ret)
312                 goto out_free_host;
313
314         status = nvmet_copy_from_sgl(req, 0, &host[1], sizeof(*host));
315         if (status)
316                 goto out_free_host;
317
318         if (memcmp(&host[0], &host[1], sizeof(host[0]))) {
319                 pr_warn("target host has requested different behaviour from the local host\n");
320                 status = NVME_SC_INTERNAL;
321         }
322
323 out_free_host:
324         kfree(host);
325 out_complete_req:
326         nvmet_req_complete(req, status);
327 }
328
329 static u16 nvmet_setup_passthru_command(struct nvmet_req *req)
330 {
331         req->p.use_workqueue = false;
332         req->execute = nvmet_passthru_execute_cmd;
333         return NVME_SC_SUCCESS;
334 }
335
336 u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
337 {
338         /* Reject any commands with non-sgl flags set (ie. fused commands) */
339         if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
340                 return NVME_SC_INVALID_FIELD;
341
342         switch (req->cmd->common.opcode) {
343         case nvme_cmd_resv_register:
344         case nvme_cmd_resv_report:
345         case nvme_cmd_resv_acquire:
346         case nvme_cmd_resv_release:
347                 /*
348                  * Reservations cannot be supported properly because the
349                  * underlying device has no way of differentiating different
350                  * hosts that connect via fabrics. This could potentially be
351                  * emulated in the future if regular targets grow support for
352                  * this feature.
353                  */
354                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
355         }
356
357         return nvmet_setup_passthru_command(req);
358 }
359
360 /*
361  * Only features that are emulated or specifically allowed in the list  are
362  * passed down to the controller. This function implements the allow list for
363  * both get and set features.
364  */
365 static u16 nvmet_passthru_get_set_features(struct nvmet_req *req)
366 {
367         switch (le32_to_cpu(req->cmd->features.fid)) {
368         case NVME_FEAT_ARBITRATION:
369         case NVME_FEAT_POWER_MGMT:
370         case NVME_FEAT_LBA_RANGE:
371         case NVME_FEAT_TEMP_THRESH:
372         case NVME_FEAT_ERR_RECOVERY:
373         case NVME_FEAT_VOLATILE_WC:
374         case NVME_FEAT_WRITE_ATOMIC:
375         case NVME_FEAT_AUTO_PST:
376         case NVME_FEAT_TIMESTAMP:
377         case NVME_FEAT_HCTM:
378         case NVME_FEAT_NOPSC:
379         case NVME_FEAT_RRL:
380         case NVME_FEAT_PLM_CONFIG:
381         case NVME_FEAT_PLM_WINDOW:
382         case NVME_FEAT_HOST_BEHAVIOR:
383         case NVME_FEAT_SANITIZE:
384         case NVME_FEAT_VENDOR_START ... NVME_FEAT_VENDOR_END:
385                 return nvmet_setup_passthru_command(req);
386
387         case NVME_FEAT_ASYNC_EVENT:
388                 /* There is no support for forwarding ASYNC events */
389         case NVME_FEAT_IRQ_COALESCE:
390         case NVME_FEAT_IRQ_CONFIG:
391                 /* The IRQ settings will not apply to the target controller */
392         case NVME_FEAT_HOST_MEM_BUF:
393                 /*
394                  * Any HMB that's set will not be passed through and will
395                  * not work as expected
396                  */
397         case NVME_FEAT_SW_PROGRESS:
398                 /*
399                  * The Pre-Boot Software Load Count doesn't make much
400                  * sense for a target to export
401                  */
402         case NVME_FEAT_RESV_MASK:
403         case NVME_FEAT_RESV_PERSIST:
404                 /* No reservations, see nvmet_parse_passthru_io_cmd() */
405         default:
406                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
407         }
408 }
409
410 u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
411 {
412         /* Reject any commands with non-sgl flags set (ie. fused commands) */
413         if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
414                 return NVME_SC_INVALID_FIELD;
415
416         /*
417          * Passthru all vendor specific commands
418          */
419         if (req->cmd->common.opcode >= nvme_admin_vendor_start)
420                 return nvmet_setup_passthru_command(req);
421
422         switch (req->cmd->common.opcode) {
423         case nvme_admin_async_event:
424                 req->execute = nvmet_execute_async_event;
425                 return NVME_SC_SUCCESS;
426         case nvme_admin_keep_alive:
427                 /*
428                  * Most PCIe ctrls don't support keep alive cmd, we route keep
429                  * alive to the non-passthru mode. In future please change this
430                  * code when PCIe ctrls with keep alive support available.
431                  */
432                 req->execute = nvmet_execute_keep_alive;
433                 return NVME_SC_SUCCESS;
434         case nvme_admin_set_features:
435                 switch (le32_to_cpu(req->cmd->features.fid)) {
436                 case NVME_FEAT_ASYNC_EVENT:
437                 case NVME_FEAT_KATO:
438                 case NVME_FEAT_NUM_QUEUES:
439                 case NVME_FEAT_HOST_ID:
440                         req->execute = nvmet_execute_set_features;
441                         return NVME_SC_SUCCESS;
442                 case NVME_FEAT_HOST_BEHAVIOR:
443                         req->execute = nvmet_passthru_set_host_behaviour;
444                         return NVME_SC_SUCCESS;
445                 default:
446                         return nvmet_passthru_get_set_features(req);
447                 }
448                 break;
449         case nvme_admin_get_features:
450                 switch (le32_to_cpu(req->cmd->features.fid)) {
451                 case NVME_FEAT_ASYNC_EVENT:
452                 case NVME_FEAT_KATO:
453                 case NVME_FEAT_NUM_QUEUES:
454                 case NVME_FEAT_HOST_ID:
455                         req->execute = nvmet_execute_get_features;
456                         return NVME_SC_SUCCESS;
457                 default:
458                         return nvmet_passthru_get_set_features(req);
459                 }
460                 break;
461         case nvme_admin_identify:
462                 switch (req->cmd->identify.cns) {
463                 case NVME_ID_CNS_CTRL:
464                         req->execute = nvmet_passthru_execute_cmd;
465                         req->p.use_workqueue = true;
466                         return NVME_SC_SUCCESS;
467                 case NVME_ID_CNS_CS_CTRL:
468                         switch (req->cmd->identify.csi) {
469                         case NVME_CSI_ZNS:
470                                 req->execute = nvmet_passthru_execute_cmd;
471                                 req->p.use_workqueue = true;
472                                 return NVME_SC_SUCCESS;
473                         }
474                         return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
475                 case NVME_ID_CNS_NS:
476                         req->execute = nvmet_passthru_execute_cmd;
477                         req->p.use_workqueue = true;
478                         return NVME_SC_SUCCESS;
479                 case NVME_ID_CNS_CS_NS:
480                         switch (req->cmd->identify.csi) {
481                         case NVME_CSI_ZNS:
482                                 req->execute = nvmet_passthru_execute_cmd;
483                                 req->p.use_workqueue = true;
484                                 return NVME_SC_SUCCESS;
485                         }
486                         return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
487                 default:
488                         return nvmet_setup_passthru_command(req);
489                 }
490         case nvme_admin_get_log_page:
491                 return nvmet_setup_passthru_command(req);
492         default:
493                 /* Reject commands not in the allowlist above */
494                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
495         }
496 }
497
498 int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
499 {
500         struct nvme_ctrl *ctrl;
501         struct file *file;
502         int ret = -EINVAL;
503         void *old;
504
505         mutex_lock(&subsys->lock);
506         if (!subsys->passthru_ctrl_path)
507                 goto out_unlock;
508         if (subsys->passthru_ctrl)
509                 goto out_unlock;
510
511         if (subsys->nr_namespaces) {
512                 pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
513                 goto out_unlock;
514         }
515
516         file = filp_open(subsys->passthru_ctrl_path, O_RDWR, 0);
517         if (IS_ERR(file)) {
518                 ret = PTR_ERR(file);
519                 goto out_unlock;
520         }
521
522         ctrl = nvme_ctrl_from_file(file);
523         if (!ctrl) {
524                 pr_err("failed to open nvme controller %s\n",
525                        subsys->passthru_ctrl_path);
526
527                 goto out_put_file;
528         }
529
530         old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL,
531                          subsys, GFP_KERNEL);
532         if (xa_is_err(old)) {
533                 ret = xa_err(old);
534                 goto out_put_file;
535         }
536
537         if (old)
538                 goto out_put_file;
539
540         subsys->passthru_ctrl = ctrl;
541         subsys->ver = ctrl->vs;
542
543         if (subsys->ver < NVME_VS(1, 2, 1)) {
544                 pr_warn("nvme controller version is too old: %llu.%llu.%llu, advertising 1.2.1\n",
545                         NVME_MAJOR(subsys->ver), NVME_MINOR(subsys->ver),
546                         NVME_TERTIARY(subsys->ver));
547                 subsys->ver = NVME_VS(1, 2, 1);
548         }
549         nvme_get_ctrl(ctrl);
550         __module_get(subsys->passthru_ctrl->ops->module);
551         ret = 0;
552
553 out_put_file:
554         filp_close(file, NULL);
555 out_unlock:
556         mutex_unlock(&subsys->lock);
557         return ret;
558 }
559
560 static void __nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
561 {
562         if (subsys->passthru_ctrl) {
563                 xa_erase(&passthru_subsystems, subsys->passthru_ctrl->cntlid);
564                 module_put(subsys->passthru_ctrl->ops->module);
565                 nvme_put_ctrl(subsys->passthru_ctrl);
566         }
567         subsys->passthru_ctrl = NULL;
568         subsys->ver = NVMET_DEFAULT_VS;
569 }
570
571 void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
572 {
573         mutex_lock(&subsys->lock);
574         __nvmet_passthru_ctrl_disable(subsys);
575         mutex_unlock(&subsys->lock);
576 }
577
578 void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys)
579 {
580         mutex_lock(&subsys->lock);
581         __nvmet_passthru_ctrl_disable(subsys);
582         mutex_unlock(&subsys->lock);
583         kfree(subsys->passthru_ctrl_path);
584 }