1 // SPDX-License-Identifier: GPL-2.0-only
3 /* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */
4 /* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */
6 #include <linux/delay.h>
7 #include <linux/dma-mapping.h>
9 #include <linux/interrupt.h>
10 #include <linux/list.h>
11 #include <linux/kobject.h>
12 #include <linux/kref.h>
13 #include <linux/mhi.h>
14 #include <linux/module.h>
15 #include <linux/msi.h>
16 #include <linux/mutex.h>
17 #include <linux/pci.h>
18 #include <linux/spinlock.h>
19 #include <linux/workqueue.h>
20 #include <linux/wait.h>
21 #include <drm/drm_accel.h>
22 #include <drm/drm_drv.h>
23 #include <drm/drm_file.h>
24 #include <drm/drm_gem.h>
25 #include <drm/drm_ioctl.h>
26 #include <drm/drm_managed.h>
27 #include <uapi/drm/qaic_accel.h>
29 #include "mhi_controller.h"
31 #include "qaic_timesync.h"
33 MODULE_IMPORT_NS(DMA_BUF);
35 #define PCI_DEV_AIC100 0xa100
36 #define QAIC_NAME "qaic"
37 #define QAIC_DESC "Qualcomm Cloud AI Accelerators"
41 bool datapath_polling;
42 module_param(datapath_polling, bool, 0400);
43 MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode");
45 static DEFINE_IDA(qaic_usrs);
47 static void free_usr(struct kref *kref)
49 struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count);
51 cleanup_srcu_struct(&usr->qddev_lock);
52 ida_free(&qaic_usrs, usr->handle);
56 static int qaic_open(struct drm_device *dev, struct drm_file *file)
58 struct qaic_drm_device *qddev = to_qaic_drm_device(dev);
59 struct qaic_device *qdev = qddev->qdev;
60 struct qaic_user *usr;
64 rcu_id = srcu_read_lock(&qdev->dev_lock);
65 if (qdev->dev_state != QAIC_ONLINE) {
70 usr = kmalloc(sizeof(*usr), GFP_KERNEL);
76 usr->handle = ida_alloc(&qaic_usrs, GFP_KERNEL);
77 if (usr->handle < 0) {
82 atomic_set(&usr->chunk_id, 0);
83 init_srcu_struct(&usr->qddev_lock);
84 kref_init(&usr->ref_count);
86 ret = mutex_lock_interruptible(&qddev->users_mutex);
90 list_add(&usr->node, &qddev->users);
91 mutex_unlock(&qddev->users_mutex);
93 file->driver_priv = usr;
95 srcu_read_unlock(&qdev->dev_lock, rcu_id);
99 cleanup_srcu_struct(&usr->qddev_lock);
100 ida_free(&qaic_usrs, usr->handle);
104 srcu_read_unlock(&qdev->dev_lock, rcu_id);
108 static void qaic_postclose(struct drm_device *dev, struct drm_file *file)
110 struct qaic_user *usr = file->driver_priv;
111 struct qaic_drm_device *qddev;
112 struct qaic_device *qdev;
118 usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
121 qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
122 if (qdev->dev_state == QAIC_ONLINE) {
123 qaic_release_usr(qdev, usr);
124 for (i = 0; i < qdev->num_dbc; ++i)
125 if (qdev->dbc[i].usr && qdev->dbc[i].usr->handle == usr->handle)
126 release_dbc(qdev, i);
128 srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
130 mutex_lock(&qddev->users_mutex);
131 if (!list_empty(&usr->node))
132 list_del_init(&usr->node);
133 mutex_unlock(&qddev->users_mutex);
136 srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
137 kref_put(&usr->ref_count, free_usr);
139 file->driver_priv = NULL;
142 DEFINE_DRM_ACCEL_FOPS(qaic_accel_fops);
144 static const struct drm_ioctl_desc qaic_drm_ioctls[] = {
145 DRM_IOCTL_DEF_DRV(QAIC_MANAGE, qaic_manage_ioctl, 0),
146 DRM_IOCTL_DEF_DRV(QAIC_CREATE_BO, qaic_create_bo_ioctl, 0),
147 DRM_IOCTL_DEF_DRV(QAIC_MMAP_BO, qaic_mmap_bo_ioctl, 0),
148 DRM_IOCTL_DEF_DRV(QAIC_ATTACH_SLICE_BO, qaic_attach_slice_bo_ioctl, 0),
149 DRM_IOCTL_DEF_DRV(QAIC_EXECUTE_BO, qaic_execute_bo_ioctl, 0),
150 DRM_IOCTL_DEF_DRV(QAIC_PARTIAL_EXECUTE_BO, qaic_partial_execute_bo_ioctl, 0),
151 DRM_IOCTL_DEF_DRV(QAIC_WAIT_BO, qaic_wait_bo_ioctl, 0),
152 DRM_IOCTL_DEF_DRV(QAIC_PERF_STATS_BO, qaic_perf_stats_bo_ioctl, 0),
153 DRM_IOCTL_DEF_DRV(QAIC_DETACH_SLICE_BO, qaic_detach_slice_bo_ioctl, 0),
156 static const struct drm_driver qaic_accel_driver = {
157 .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL,
163 .fops = &qaic_accel_fops,
165 .postclose = qaic_postclose,
167 .ioctls = qaic_drm_ioctls,
168 .num_ioctls = ARRAY_SIZE(qaic_drm_ioctls),
169 .gem_prime_import = qaic_gem_prime_import,
172 static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id)
174 struct qaic_drm_device *qddev = qdev->qddev;
175 struct drm_device *drm = to_drm(qddev);
178 /* Hold off implementing partitions until the uapi is determined */
179 if (partition_id != QAIC_NO_PARTITION)
182 qddev->partition_id = partition_id;
184 ret = drm_dev_register(drm, 0);
186 pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret);
191 static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
193 struct qaic_drm_device *qddev = qdev->qddev;
194 struct drm_device *drm = to_drm(qddev);
195 struct qaic_user *usr;
197 drm_dev_unregister(drm);
198 qddev->partition_id = 0;
200 * Existing users get unresolvable errors till they close FDs.
201 * Need to sync carefully with users calling close(). The
202 * list of users can be modified elsewhere when the lock isn't
203 * held here, but the sync'ing the srcu with the mutex held
204 * could deadlock. Grab the mutex so that the list will be
205 * unmodified. The user we get will exist as long as the
206 * lock is held. Signal that the qcdev is going away, and
207 * grab a reference to the user so they don't go away for
208 * synchronize_srcu(). Then release the mutex to avoid
209 * deadlock and make sure the user has observed the signal.
210 * With the lock released, we cannot maintain any state of the
213 mutex_lock(&qddev->users_mutex);
214 while (!list_empty(&qddev->users)) {
215 usr = list_first_entry(&qddev->users, struct qaic_user, node);
216 list_del_init(&usr->node);
217 kref_get(&usr->ref_count);
219 mutex_unlock(&qddev->users_mutex);
220 synchronize_srcu(&usr->qddev_lock);
221 kref_put(&usr->ref_count, free_usr);
222 mutex_lock(&qddev->users_mutex);
224 mutex_unlock(&qddev->users_mutex);
227 static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
229 u16 major = -1, minor = -1;
230 struct qaic_device *qdev;
234 * Invoking this function indicates that the control channel to the
235 * device is available. We use that as a signal to indicate that
236 * the device side firmware has booted. The device side firmware
237 * manages the device resources, so we need to communicate with it
238 * via the control channel in order to utilize the device. Therefore
239 * we wait until this signal to create the drm dev that userspace will
240 * use to control the device, because without the device side firmware,
241 * userspace can't do anything useful.
244 qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
246 dev_set_drvdata(&mhi_dev->dev, qdev);
247 qdev->cntl_ch = mhi_dev;
249 ret = qaic_control_open(qdev);
251 pci_dbg(qdev->pdev, "%s: control_open failed %d\n", __func__, ret);
255 qdev->dev_state = QAIC_BOOT;
256 ret = get_cntl_version(qdev, NULL, &major, &minor);
257 if (ret || major != CNTL_MAJOR || minor > CNTL_MINOR) {
258 pci_err(qdev->pdev, "%s: Control protocol version (%d.%d) not supported. Supported version is (%d.%d). Ret: %d\n",
259 __func__, major, minor, CNTL_MAJOR, CNTL_MINOR, ret);
263 qdev->dev_state = QAIC_ONLINE;
264 kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_ONLINE);
269 qaic_control_close(qdev);
273 static void qaic_mhi_remove(struct mhi_device *mhi_dev)
275 /* This is redundant since we have already observed the device crash */
278 static void qaic_notify_reset(struct qaic_device *qdev)
282 kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_OFFLINE);
283 qdev->dev_state = QAIC_OFFLINE;
284 /* wake up any waiters to avoid waiting for timeouts at sync */
286 for (i = 0; i < qdev->num_dbc; ++i)
288 synchronize_srcu(&qdev->dev_lock);
291 void qaic_dev_reset_clean_local_state(struct qaic_device *qdev)
295 qaic_notify_reset(qdev);
297 /* start tearing things down */
298 for (i = 0; i < qdev->num_dbc; ++i)
299 release_dbc(qdev, i);
302 static void cleanup_qdev(struct qaic_device *qdev)
306 for (i = 0; i < qdev->num_dbc; ++i)
307 cleanup_srcu_struct(&qdev->dbc[i].ch_lock);
308 cleanup_srcu_struct(&qdev->dev_lock);
309 pci_set_drvdata(qdev->pdev, NULL);
310 destroy_workqueue(qdev->cntl_wq);
311 destroy_workqueue(qdev->qts_wq);
314 static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id)
316 struct qaic_drm_device *qddev;
317 struct qaic_device *qdev;
320 qdev = devm_kzalloc(&pdev->dev, sizeof(*qdev), GFP_KERNEL);
324 qdev->dev_state = QAIC_OFFLINE;
325 if (id->device == PCI_DEV_AIC100) {
327 qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
332 qdev->cntl_wq = alloc_workqueue("qaic_cntl", WQ_UNBOUND, 0);
336 qdev->qts_wq = alloc_workqueue("qaic_ts", WQ_UNBOUND, 0);
338 destroy_workqueue(qdev->cntl_wq);
342 pci_set_drvdata(pdev, qdev);
345 mutex_init(&qdev->cntl_mutex);
346 INIT_LIST_HEAD(&qdev->cntl_xfer_list);
347 init_srcu_struct(&qdev->dev_lock);
349 for (i = 0; i < qdev->num_dbc; ++i) {
350 spin_lock_init(&qdev->dbc[i].xfer_lock);
351 qdev->dbc[i].qdev = qdev;
353 INIT_LIST_HEAD(&qdev->dbc[i].xfer_list);
354 init_srcu_struct(&qdev->dbc[i].ch_lock);
355 init_waitqueue_head(&qdev->dbc[i].dbc_release);
356 INIT_LIST_HEAD(&qdev->dbc[i].bo_lists);
359 qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm);
365 drmm_mutex_init(to_drm(qddev), &qddev->users_mutex);
366 INIT_LIST_HEAD(&qddev->users);
373 static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev)
378 bars = pci_select_bars(pdev, IORESOURCE_MEM);
380 /* make sure the device has the expected BARs */
381 if (bars != (BIT(0) | BIT(2) | BIT(4))) {
382 pci_dbg(pdev, "%s: expected BARs 0, 2, and 4 not found in device. Found 0x%x\n",
387 ret = pcim_enable_device(pdev);
391 ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
394 ret = dma_set_max_seg_size(&pdev->dev, UINT_MAX);
398 qdev->bar_0 = devm_ioremap_resource(&pdev->dev, &pdev->resource[0]);
399 if (IS_ERR(qdev->bar_0))
400 return PTR_ERR(qdev->bar_0);
402 qdev->bar_2 = devm_ioremap_resource(&pdev->dev, &pdev->resource[2]);
403 if (IS_ERR(qdev->bar_2))
404 return PTR_ERR(qdev->bar_2);
406 /* Managed release since we use pcim_enable_device above */
407 pci_set_master(pdev);
412 static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
418 /* Managed release since we use pcim_enable_device */
419 ret = pci_alloc_irq_vectors(pdev, 32, 32, PCI_IRQ_MSI);
420 if (ret == -ENOSPC) {
421 ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
426 * Operate in one MSI mode. All interrupts will be directed to
427 * MSI0; every interrupt will wake up all the interrupt handlers
428 * (MHI and DBC[0-15]). Since the interrupt is now shared, it is
429 * not disabled during DBC threaded handler, but only one thread
430 * will be allowed to run per DBC, so while it can be
431 * interrupted, it shouldn't race with itself.
433 qdev->single_msi = true;
434 pci_info(pdev, "Allocating 32 MSIs failed, operating in 1 MSI mode. Performance may be impacted.\n");
435 } else if (ret < 0) {
439 mhi_irq = pci_irq_vector(pdev, 0);
443 for (i = 0; i < qdev->num_dbc; ++i) {
444 ret = devm_request_threaded_irq(&pdev->dev,
445 pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1),
446 dbc_irq_handler, dbc_irq_threaded_fn, IRQF_SHARED,
447 "qaic_dbc", &qdev->dbc[i]);
451 if (datapath_polling) {
452 qdev->dbc[i].irq = pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1);
453 if (!qdev->single_msi)
454 disable_irq_nosync(qdev->dbc[i].irq);
455 INIT_WORK(&qdev->dbc[i].poll_work, irq_polling_work);
462 static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
464 struct qaic_device *qdev;
469 qdev = create_qdev(pdev, id);
473 ret = init_pci(qdev, pdev);
477 for (i = 0; i < qdev->num_dbc; ++i)
478 qdev->dbc[i].dbc_base = qdev->bar_2 + QAIC_DBC_OFF(i);
480 mhi_irq = init_msi(qdev, pdev);
486 ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
490 qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq,
492 if (IS_ERR(qdev->mhi_cntrl)) {
493 ret = PTR_ERR(qdev->mhi_cntrl);
494 goto cleanup_drm_dev;
500 qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
506 static void qaic_pci_remove(struct pci_dev *pdev)
508 struct qaic_device *qdev = pci_get_drvdata(pdev);
513 qaic_dev_reset_clean_local_state(qdev);
514 qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
515 qaic_mhi_free_controller(qdev->mhi_cntrl, link_up);
519 static void qaic_pci_shutdown(struct pci_dev *pdev)
521 /* see qaic_exit for what link_up is doing */
523 qaic_pci_remove(pdev);
526 static pci_ers_result_t qaic_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t error)
528 return PCI_ERS_RESULT_NEED_RESET;
531 static void qaic_pci_reset_prepare(struct pci_dev *pdev)
533 struct qaic_device *qdev = pci_get_drvdata(pdev);
535 qaic_notify_reset(qdev);
536 qaic_mhi_start_reset(qdev->mhi_cntrl);
537 qaic_dev_reset_clean_local_state(qdev);
540 static void qaic_pci_reset_done(struct pci_dev *pdev)
542 struct qaic_device *qdev = pci_get_drvdata(pdev);
544 qaic_mhi_reset_done(qdev->mhi_cntrl);
547 static const struct mhi_device_id qaic_mhi_match_table[] = {
548 { .chan = "QAIC_CONTROL", },
552 static struct mhi_driver qaic_mhi_driver = {
553 .id_table = qaic_mhi_match_table,
554 .remove = qaic_mhi_remove,
555 .probe = qaic_mhi_probe,
556 .ul_xfer_cb = qaic_mhi_ul_xfer_cb,
557 .dl_xfer_cb = qaic_mhi_dl_xfer_cb,
563 static const struct pci_device_id qaic_ids[] = {
564 { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC100), },
567 MODULE_DEVICE_TABLE(pci, qaic_ids);
569 static const struct pci_error_handlers qaic_pci_err_handler = {
570 .error_detected = qaic_pci_error_detected,
571 .reset_prepare = qaic_pci_reset_prepare,
572 .reset_done = qaic_pci_reset_done,
575 static struct pci_driver qaic_pci_driver = {
577 .id_table = qaic_ids,
578 .probe = qaic_pci_probe,
579 .remove = qaic_pci_remove,
580 .shutdown = qaic_pci_shutdown,
581 .err_handler = &qaic_pci_err_handler,
584 static int __init qaic_init(void)
588 ret = pci_register_driver(&qaic_pci_driver);
590 pr_debug("qaic: pci_register_driver failed %d\n", ret);
594 ret = mhi_driver_register(&qaic_mhi_driver);
596 pr_debug("qaic: mhi_driver_register failed %d\n", ret);
600 ret = qaic_timesync_init();
602 pr_debug("qaic: qaic_timesync_init failed %d\n", ret);
607 pci_unregister_driver(&qaic_pci_driver);
611 static void __exit qaic_exit(void)
614 * We assume that qaic_pci_remove() is called due to a hotplug event
615 * which would mean that the link is down, and thus
616 * qaic_mhi_free_controller() should not try to access the device during
618 * We call pci_unregister_driver() below, which also triggers
619 * qaic_pci_remove(), but since this is module exit, we expect the link
620 * to the device to be up, in which case qaic_mhi_free_controller()
621 * should try to access the device during cleanup to put the device in
623 * For that reason, we set link_up here to let qaic_mhi_free_controller
624 * know the expected link state. Since the module is going to be
625 * removed at the end of this, we don't need to worry about
626 * reinitializing the link_up state after the cleanup is done.
629 qaic_timesync_deinit();
630 mhi_driver_unregister(&qaic_mhi_driver);
631 pci_unregister_driver(&qaic_pci_driver);
634 module_init(qaic_init);
635 module_exit(qaic_exit);
637 MODULE_AUTHOR(QAIC_DESC " Kernel Driver Team");
638 MODULE_DESCRIPTION(QAIC_DESC " Accel Driver");
639 MODULE_LICENSE("GPL");