1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
3 #include <linux/libnvdimm.h>
4 #include <asm/unaligned.h>
5 #include <linux/device.h>
6 #include <linux/module.h>
7 #include <linux/ndctl.h>
8 #include <linux/async.h>
9 #include <linux/slab.h>
15 * Ordered workqueue for cxl nvdimm device arrival and departure
16 * to coordinate bus rescans when a bridge arrives and trigger remove
17 * operations when the bridge is removed.
19 static struct workqueue_struct *cxl_pmem_wq;
21 static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
23 static void clear_exclusive(void *cxlds)
25 clear_exclusive_cxl_commands(cxlds, exclusive_cmds);
28 static void unregister_nvdimm(void *nvdimm)
30 struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
31 struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge;
32 struct cxl_pmem_region *cxlr_pmem;
35 device_lock(&cxl_nvb->dev);
36 dev_set_drvdata(&cxl_nvd->dev, NULL);
37 xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) {
38 get_device(&cxlr_pmem->dev);
39 device_unlock(&cxl_nvb->dev);
41 device_release_driver(&cxlr_pmem->dev);
42 put_device(&cxlr_pmem->dev);
44 device_lock(&cxl_nvb->dev);
46 device_unlock(&cxl_nvb->dev);
48 nvdimm_delete(nvdimm);
49 cxl_nvd->bridge = NULL;
52 static int cxl_nvdimm_probe(struct device *dev)
54 struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
55 struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
56 unsigned long flags = 0, cmd_mask = 0;
57 struct cxl_dev_state *cxlds = cxlmd->cxlds;
58 struct cxl_nvdimm_bridge *cxl_nvb;
59 struct nvdimm *nvdimm;
62 cxl_nvb = cxl_find_nvdimm_bridge(dev);
66 device_lock(&cxl_nvb->dev);
67 if (!cxl_nvb->nvdimm_bus) {
72 set_exclusive_cxl_commands(cxlds, exclusive_cmds);
73 rc = devm_add_action_or_reset(dev, clear_exclusive, cxlds);
77 set_bit(NDD_LABELING, &flags);
78 set_bit(NDD_REGISTER_SYNC, &flags);
79 set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
80 set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
81 set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
82 nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags,
89 dev_set_drvdata(dev, nvdimm);
90 cxl_nvd->bridge = cxl_nvb;
91 rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
93 device_unlock(&cxl_nvb->dev);
94 put_device(&cxl_nvb->dev);
99 static struct cxl_driver cxl_nvdimm_driver = {
100 .name = "cxl_nvdimm",
101 .probe = cxl_nvdimm_probe,
102 .id = CXL_DEVICE_NVDIMM,
105 static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds,
106 struct nd_cmd_get_config_size *cmd,
107 unsigned int buf_len)
109 if (sizeof(*cmd) > buf_len)
112 *cmd = (struct nd_cmd_get_config_size) {
113 .config_size = cxlds->lsa_size,
114 .max_xfer = cxlds->payload_size - sizeof(struct cxl_mbox_set_lsa),
120 static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds,
121 struct nd_cmd_get_config_data_hdr *cmd,
122 unsigned int buf_len)
124 struct cxl_mbox_get_lsa get_lsa;
127 if (sizeof(*cmd) > buf_len)
129 if (struct_size(cmd, out_buf, cmd->in_length) > buf_len)
132 get_lsa = (struct cxl_mbox_get_lsa) {
133 .offset = cpu_to_le32(cmd->in_offset),
134 .length = cpu_to_le32(cmd->in_length),
137 rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LSA, &get_lsa,
138 sizeof(get_lsa), cmd->out_buf, cmd->in_length);
144 static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds,
145 struct nd_cmd_set_config_hdr *cmd,
146 unsigned int buf_len)
148 struct cxl_mbox_set_lsa *set_lsa;
151 if (sizeof(*cmd) > buf_len)
154 /* 4-byte status follows the input data in the payload */
155 if (size_add(struct_size(cmd, in_buf, cmd->in_length), 4) > buf_len)
159 kvzalloc(struct_size(set_lsa, data, cmd->in_length), GFP_KERNEL);
163 *set_lsa = (struct cxl_mbox_set_lsa) {
164 .offset = cpu_to_le32(cmd->in_offset),
166 memcpy(set_lsa->data, cmd->in_buf, cmd->in_length);
168 rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_LSA, set_lsa,
169 struct_size(set_lsa, data, cmd->in_length),
173 * Set "firmware" status (4-packed bytes at the end of the input
176 put_unaligned(0, (u32 *) &cmd->in_buf[cmd->in_length]);
182 static int cxl_pmem_nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd,
183 void *buf, unsigned int buf_len)
185 struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
186 unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
187 struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
188 struct cxl_dev_state *cxlds = cxlmd->cxlds;
190 if (!test_bit(cmd, &cmd_mask))
194 case ND_CMD_GET_CONFIG_SIZE:
195 return cxl_pmem_get_config_size(cxlds, buf, buf_len);
196 case ND_CMD_GET_CONFIG_DATA:
197 return cxl_pmem_get_config_data(cxlds, buf, buf_len);
198 case ND_CMD_SET_CONFIG_DATA:
199 return cxl_pmem_set_config_data(cxlds, buf, buf_len);
205 static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc,
206 struct nvdimm *nvdimm, unsigned int cmd, void *buf,
207 unsigned int buf_len, int *cmd_rc)
210 * No firmware response to translate, let the transport error
211 * code take precedence.
217 return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len);
220 static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb)
222 if (cxl_nvb->nvdimm_bus)
224 cxl_nvb->nvdimm_bus =
225 nvdimm_bus_register(&cxl_nvb->dev, &cxl_nvb->nd_desc);
226 return cxl_nvb->nvdimm_bus != NULL;
229 static int cxl_nvdimm_release_driver(struct device *dev, void *cxl_nvb)
231 struct cxl_nvdimm *cxl_nvd;
233 if (!is_cxl_nvdimm(dev))
236 cxl_nvd = to_cxl_nvdimm(dev);
237 if (cxl_nvd->bridge != cxl_nvb)
240 device_release_driver(dev);
244 static int cxl_pmem_region_release_driver(struct device *dev, void *cxl_nvb)
246 struct cxl_pmem_region *cxlr_pmem;
248 if (!is_cxl_pmem_region(dev))
251 cxlr_pmem = to_cxl_pmem_region(dev);
252 if (cxlr_pmem->bridge != cxl_nvb)
255 device_release_driver(dev);
259 static void offline_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb,
260 struct nvdimm_bus *nvdimm_bus)
266 * Set the state of cxl_nvdimm devices to unbound / idle before
267 * nvdimm_bus_unregister() rips the nvdimm objects out from
270 bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
271 cxl_pmem_region_release_driver);
272 bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
273 cxl_nvdimm_release_driver);
274 nvdimm_bus_unregister(nvdimm_bus);
277 static void cxl_nvb_update_state(struct work_struct *work)
279 struct cxl_nvdimm_bridge *cxl_nvb =
280 container_of(work, typeof(*cxl_nvb), state_work);
281 struct nvdimm_bus *victim_bus = NULL;
282 bool release = false, rescan = false;
284 device_lock(&cxl_nvb->dev);
285 switch (cxl_nvb->state) {
287 if (!online_nvdimm_bus(cxl_nvb)) {
288 dev_err(&cxl_nvb->dev,
289 "failed to establish nvdimm bus\n");
294 case CXL_NVB_OFFLINE:
296 victim_bus = cxl_nvb->nvdimm_bus;
297 cxl_nvb->nvdimm_bus = NULL;
302 device_unlock(&cxl_nvb->dev);
305 device_release_driver(&cxl_nvb->dev);
307 int rc = bus_rescan_devices(&cxl_bus_type);
309 dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc);
311 offline_nvdimm_bus(cxl_nvb, victim_bus);
313 put_device(&cxl_nvb->dev);
316 static void cxl_nvdimm_bridge_state_work(struct cxl_nvdimm_bridge *cxl_nvb)
319 * Take a reference that the workqueue will drop if new work
322 get_device(&cxl_nvb->dev);
323 if (!queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
324 put_device(&cxl_nvb->dev);
327 static void cxl_nvdimm_bridge_remove(struct device *dev)
329 struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
331 if (cxl_nvb->state == CXL_NVB_ONLINE)
332 cxl_nvb->state = CXL_NVB_OFFLINE;
333 cxl_nvdimm_bridge_state_work(cxl_nvb);
336 static int cxl_nvdimm_bridge_probe(struct device *dev)
338 struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
340 if (cxl_nvb->state == CXL_NVB_DEAD)
343 if (cxl_nvb->state == CXL_NVB_NEW) {
344 cxl_nvb->nd_desc = (struct nvdimm_bus_descriptor) {
345 .provider_name = "CXL",
346 .module = THIS_MODULE,
347 .ndctl = cxl_pmem_ctl,
350 INIT_WORK(&cxl_nvb->state_work, cxl_nvb_update_state);
353 cxl_nvb->state = CXL_NVB_ONLINE;
354 cxl_nvdimm_bridge_state_work(cxl_nvb);
359 static struct cxl_driver cxl_nvdimm_bridge_driver = {
360 .name = "cxl_nvdimm_bridge",
361 .probe = cxl_nvdimm_bridge_probe,
362 .remove = cxl_nvdimm_bridge_remove,
363 .id = CXL_DEVICE_NVDIMM_BRIDGE,
366 static int match_cxl_nvdimm(struct device *dev, void *data)
368 return is_cxl_nvdimm(dev);
371 static void unregister_nvdimm_region(void *nd_region)
373 nvdimm_region_delete(nd_region);
376 static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd,
377 struct cxl_pmem_region *cxlr_pmem)
381 rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem,
382 cxlr_pmem, GFP_KERNEL);
386 get_device(&cxlr_pmem->dev);
390 static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd,
391 struct cxl_pmem_region *cxlr_pmem)
394 * It is possible this is called without a corresponding
395 * cxl_nvdimm_add_region for @cxlr_pmem
397 cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem);
399 put_device(&cxlr_pmem->dev);
402 static void release_mappings(void *data)
405 struct cxl_pmem_region *cxlr_pmem = data;
406 struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge;
408 device_lock(&cxl_nvb->dev);
409 for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
410 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
411 struct cxl_nvdimm *cxl_nvd = m->cxl_nvd;
413 cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem);
415 device_unlock(&cxl_nvb->dev);
418 static void cxlr_pmem_remove_resource(void *res)
420 remove_resource(res);
423 struct cxl_pmem_region_info {
428 static int cxl_pmem_region_probe(struct device *dev)
430 struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE];
431 struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
432 struct cxl_region *cxlr = cxlr_pmem->cxlr;
433 struct cxl_pmem_region_info *info = NULL;
434 struct cxl_nvdimm_bridge *cxl_nvb;
435 struct nd_interleave_set *nd_set;
436 struct nd_region_desc ndr_desc;
437 struct cxl_nvdimm *cxl_nvd;
438 struct nvdimm *nvdimm;
439 struct resource *res;
442 cxl_nvb = cxl_find_nvdimm_bridge(&cxlr_pmem->mapping[0].cxlmd->dev);
444 dev_dbg(dev, "bridge not found\n");
447 cxlr_pmem->bridge = cxl_nvb;
449 device_lock(&cxl_nvb->dev);
450 if (!cxl_nvb->nvdimm_bus) {
451 dev_dbg(dev, "nvdimm bus not found\n");
456 memset(&mappings, 0, sizeof(mappings));
457 memset(&ndr_desc, 0, sizeof(ndr_desc));
459 res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
465 res->name = "Persistent Memory";
466 res->start = cxlr_pmem->hpa_range.start;
467 res->end = cxlr_pmem->hpa_range.end;
468 res->flags = IORESOURCE_MEM;
469 res->desc = IORES_DESC_PERSISTENT_MEMORY;
471 rc = insert_resource(&iomem_resource, res);
475 rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
480 ndr_desc.provider_data = cxlr_pmem;
482 ndr_desc.numa_node = memory_add_physaddr_to_nid(res->start);
483 ndr_desc.target_node = phys_to_target_node(res->start);
484 if (ndr_desc.target_node == NUMA_NO_NODE) {
485 ndr_desc.target_node = ndr_desc.numa_node;
486 dev_dbg(&cxlr->dev, "changing target node from %d to %d",
487 NUMA_NO_NODE, ndr_desc.target_node);
490 nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
496 ndr_desc.memregion = cxlr->id;
497 set_bit(ND_REGION_CXL, &ndr_desc.flags);
498 set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
500 info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
506 rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem);
510 for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
511 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
512 struct cxl_memdev *cxlmd = m->cxlmd;
513 struct cxl_dev_state *cxlds = cxlmd->cxlds;
516 d = device_find_child(&cxlmd->dev, NULL, match_cxl_nvdimm);
518 dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i,
519 dev_name(&cxlmd->dev));
524 /* safe to drop ref now with bridge lock held */
527 cxl_nvd = to_cxl_nvdimm(d);
528 nvdimm = dev_get_drvdata(&cxl_nvd->dev);
530 dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
531 dev_name(&cxlmd->dev));
537 * Pin the region per nvdimm device as those may be released
538 * out-of-order with respect to the region, and a single nvdimm
539 * maybe associated with multiple regions
541 rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem);
544 m->cxl_nvd = cxl_nvd;
545 mappings[i] = (struct nd_mapping_desc) {
551 info[i].offset = m->start;
552 info[i].serial = cxlds->serial;
554 ndr_desc.num_mappings = cxlr_pmem->nr_mappings;
555 ndr_desc.mapping = mappings;
558 * TODO enable CXL labels which skip the need for 'interleave-set cookie'
561 nd_fletcher64(info, sizeof(*info) * cxlr_pmem->nr_mappings, 0);
562 nd_set->cookie2 = nd_set->cookie1;
563 ndr_desc.nd_set = nd_set;
565 cxlr_pmem->nd_region =
566 nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc);
567 if (!cxlr_pmem->nd_region) {
572 rc = devm_add_action_or_reset(dev, unregister_nvdimm_region,
573 cxlr_pmem->nd_region);
577 device_unlock(&cxl_nvb->dev);
578 put_device(&cxl_nvb->dev);
583 static struct cxl_driver cxl_pmem_region_driver = {
584 .name = "cxl_pmem_region",
585 .probe = cxl_pmem_region_probe,
586 .id = CXL_DEVICE_PMEM_REGION,
590 * Return all bridges to the CXL_NVB_NEW state to invalidate any
591 * ->state_work referring to the now destroyed cxl_pmem_wq.
593 static int cxl_nvdimm_bridge_reset(struct device *dev, void *data)
595 struct cxl_nvdimm_bridge *cxl_nvb;
597 if (!is_cxl_nvdimm_bridge(dev))
600 cxl_nvb = to_cxl_nvdimm_bridge(dev);
602 cxl_nvb->state = CXL_NVB_NEW;
608 static void destroy_cxl_pmem_wq(void)
610 destroy_workqueue(cxl_pmem_wq);
611 bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_bridge_reset);
614 static __init int cxl_pmem_init(void)
618 set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds);
619 set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds);
621 cxl_pmem_wq = alloc_ordered_workqueue("cxl_pmem", 0);
625 rc = cxl_driver_register(&cxl_nvdimm_bridge_driver);
629 rc = cxl_driver_register(&cxl_nvdimm_driver);
633 rc = cxl_driver_register(&cxl_pmem_region_driver);
640 cxl_driver_unregister(&cxl_nvdimm_driver);
642 cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
644 destroy_cxl_pmem_wq();
648 static __exit void cxl_pmem_exit(void)
650 cxl_driver_unregister(&cxl_pmem_region_driver);
651 cxl_driver_unregister(&cxl_nvdimm_driver);
652 cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
653 destroy_cxl_pmem_wq();
656 MODULE_LICENSE("GPL v2");
657 module_init(cxl_pmem_init);
658 module_exit(cxl_pmem_exit);
659 MODULE_IMPORT_NS(CXL);
660 MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE);
661 MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM);
662 MODULE_ALIAS_CXL(CXL_DEVICE_PMEM_REGION);