1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright 2020-2021 NXP
6 #include <linux/init.h>
7 #include <linux/interconnect.h>
8 #include <linux/ioctl.h>
9 #include <linux/list.h>
10 #include <linux/kernel.h>
11 #include <linux/module.h>
13 #include <linux/of_address.h>
14 #include <linux/platform_device.h>
15 #include <linux/slab.h>
16 #include <linux/types.h>
17 #include <linux/pm_runtime.h>
18 #include <linux/pm_domain.h>
19 #include <linux/firmware.h>
20 #include <linux/vmalloc.h>
29 void csr_writel(struct vpu_core *core, u32 reg, u32 val)
31 writel(val, core->base + reg);
34 u32 csr_readl(struct vpu_core *core, u32 reg)
36 return readl(core->base + reg);
39 static int vpu_core_load_firmware(struct vpu_core *core)
41 const struct firmware *pfw = NULL;
45 dev_err(core->dev, "firmware buffer is not ready\n");
49 ret = reject_firmware(&pfw, core->res->fwname, core->dev);
50 dev_dbg(core->dev, "request_firmware %s : %d\n", core->res->fwname, ret);
52 dev_err(core->dev, "request firmware %s failed, ret = %d\n",
53 core->res->fwname, ret);
57 if (core->fw.length < pfw->size) {
58 dev_err(core->dev, "firmware buffer size want %zu, but %d\n",
59 pfw->size, core->fw.length);
64 memset(core->fw.virt, 0, core->fw.length);
65 memcpy(core->fw.virt, pfw->data, pfw->size);
66 core->fw.bytesused = pfw->size;
67 ret = vpu_iface_on_firmware_loaded(core);
69 release_firmware(pfw);
75 static int vpu_core_boot_done(struct vpu_core *core)
79 fw_version = vpu_iface_get_version(core);
80 dev_info(core->dev, "%s firmware version : %d.%d.%d\n",
81 vpu_core_type_desc(core->type),
82 (fw_version >> 16) & 0xff,
83 (fw_version >> 8) & 0xff,
85 core->supported_instance_count = vpu_iface_get_max_instance_count(core);
86 if (core->res->act_size) {
87 u32 count = core->act.length / core->res->act_size;
89 core->supported_instance_count = min(core->supported_instance_count, count);
91 if (core->supported_instance_count >= BITS_PER_TYPE(core->instance_mask))
92 core->supported_instance_count = BITS_PER_TYPE(core->instance_mask);
93 core->fw_version = fw_version;
94 vpu_core_set_state(core, VPU_CORE_ACTIVE);
99 static int vpu_core_wait_boot_done(struct vpu_core *core)
103 ret = wait_for_completion_timeout(&core->cmp, VPU_TIMEOUT);
105 dev_err(core->dev, "boot timeout\n");
108 return vpu_core_boot_done(core);
111 static int vpu_core_boot(struct vpu_core *core, bool load)
115 reinit_completion(&core->cmp);
117 ret = vpu_core_load_firmware(core);
122 vpu_iface_boot_core(core);
123 return vpu_core_wait_boot_done(core);
126 static int vpu_core_shutdown(struct vpu_core *core)
128 return vpu_iface_shutdown_core(core);
131 static int vpu_core_restore(struct vpu_core *core)
135 ret = vpu_core_sw_reset(core);
139 vpu_core_boot_done(core);
140 return vpu_iface_restore_core(core);
143 static int __vpu_alloc_dma(struct device *dev, struct vpu_buffer *buf)
145 gfp_t gfp = GFP_KERNEL | GFP_DMA32;
150 buf->virt = dma_alloc_coherent(dev, buf->length, &buf->phys, gfp);
159 void vpu_free_dma(struct vpu_buffer *buf)
161 if (!buf->virt || !buf->dev)
164 dma_free_coherent(buf->dev, buf->length, buf->virt, buf->phys);
172 int vpu_alloc_dma(struct vpu_core *core, struct vpu_buffer *buf)
174 return __vpu_alloc_dma(core->dev, buf);
177 void vpu_core_set_state(struct vpu_core *core, enum vpu_core_state state)
179 if (state != core->state)
180 vpu_trace(core->dev, "vpu core state change from %d to %d\n", core->state, state);
182 if (core->state == VPU_CORE_DEINIT)
186 static void vpu_core_update_state(struct vpu_core *core)
188 if (!vpu_iface_get_power_state(core)) {
189 if (core->request_count)
190 vpu_core_set_state(core, VPU_CORE_HANG);
192 vpu_core_set_state(core, VPU_CORE_DEINIT);
194 } else if (core->state == VPU_CORE_ACTIVE && core->hang_mask) {
195 vpu_core_set_state(core, VPU_CORE_HANG);
199 static struct vpu_core *vpu_core_find_proper_by_type(struct vpu_dev *vpu, u32 type)
201 struct vpu_core *core = NULL;
202 int request_count = INT_MAX;
205 list_for_each_entry(c, &vpu->cores, list) {
206 dev_dbg(c->dev, "instance_mask = 0x%lx, state = %d\n", c->instance_mask, c->state);
209 mutex_lock(&c->lock);
210 vpu_core_update_state(c);
211 mutex_unlock(&c->lock);
212 if (c->state == VPU_CORE_DEINIT) {
216 if (c->state != VPU_CORE_ACTIVE)
218 if (c->request_count < request_count) {
219 request_count = c->request_count;
229 static bool vpu_core_is_exist(struct vpu_dev *vpu, struct vpu_core *core)
233 list_for_each_entry(c, &vpu->cores, list) {
241 static void vpu_core_get_vpu(struct vpu_core *core)
243 core->vpu->get_vpu(core->vpu);
244 if (core->type == VPU_CORE_TYPE_ENC)
245 core->vpu->get_enc(core->vpu);
246 if (core->type == VPU_CORE_TYPE_DEC)
247 core->vpu->get_dec(core->vpu);
250 static int vpu_core_register(struct device *dev, struct vpu_core *core)
252 struct vpu_dev *vpu = dev_get_drvdata(dev);
255 dev_dbg(core->dev, "register core %s\n", vpu_core_type_desc(core->type));
256 if (vpu_core_is_exist(vpu, core))
259 core->workqueue = alloc_ordered_workqueue("vpu", WQ_MEM_RECLAIM);
260 if (!core->workqueue) {
261 dev_err(core->dev, "fail to alloc workqueue\n");
264 INIT_WORK(&core->msg_work, vpu_msg_run_work);
265 INIT_DELAYED_WORK(&core->msg_delayed_work, vpu_msg_delayed_work);
266 core->msg_buffer_size = roundup_pow_of_two(VPU_MSG_BUFFER_SIZE);
267 core->msg_buffer = vzalloc(core->msg_buffer_size);
268 if (!core->msg_buffer) {
269 dev_err(core->dev, "failed allocate buffer for fifo\n");
273 ret = kfifo_init(&core->msg_fifo, core->msg_buffer, core->msg_buffer_size);
275 dev_err(core->dev, "failed init kfifo\n");
279 list_add_tail(&core->list, &vpu->cores);
280 vpu_core_get_vpu(core);
284 if (core->msg_buffer) {
285 vfree(core->msg_buffer);
286 core->msg_buffer = NULL;
288 if (core->workqueue) {
289 destroy_workqueue(core->workqueue);
290 core->workqueue = NULL;
295 static void vpu_core_put_vpu(struct vpu_core *core)
297 if (core->type == VPU_CORE_TYPE_ENC)
298 core->vpu->put_enc(core->vpu);
299 if (core->type == VPU_CORE_TYPE_DEC)
300 core->vpu->put_dec(core->vpu);
301 core->vpu->put_vpu(core->vpu);
304 static int vpu_core_unregister(struct device *dev, struct vpu_core *core)
306 list_del_init(&core->list);
308 vpu_core_put_vpu(core);
310 vfree(core->msg_buffer);
311 core->msg_buffer = NULL;
313 if (core->workqueue) {
314 cancel_work_sync(&core->msg_work);
315 cancel_delayed_work_sync(&core->msg_delayed_work);
316 destroy_workqueue(core->workqueue);
317 core->workqueue = NULL;
323 static int vpu_core_acquire_instance(struct vpu_core *core)
327 id = ffz(core->instance_mask);
328 if (id >= core->supported_instance_count)
331 set_bit(id, &core->instance_mask);
336 static void vpu_core_release_instance(struct vpu_core *core, int id)
338 if (id < 0 || id >= core->supported_instance_count)
341 clear_bit(id, &core->instance_mask);
344 struct vpu_inst *vpu_inst_get(struct vpu_inst *inst)
349 atomic_inc(&inst->ref_count);
354 void vpu_inst_put(struct vpu_inst *inst)
358 if (atomic_dec_and_test(&inst->ref_count)) {
364 struct vpu_core *vpu_request_core(struct vpu_dev *vpu, enum vpu_core_type type)
366 struct vpu_core *core = NULL;
369 mutex_lock(&vpu->lock);
371 core = vpu_core_find_proper_by_type(vpu, type);
375 mutex_lock(&core->lock);
376 pm_runtime_resume_and_get(core->dev);
378 if (core->state == VPU_CORE_DEINIT) {
379 if (vpu_iface_get_power_state(core))
380 ret = vpu_core_restore(core);
382 ret = vpu_core_boot(core, true);
384 pm_runtime_put_sync(core->dev);
385 mutex_unlock(&core->lock);
391 core->request_count++;
393 mutex_unlock(&core->lock);
395 mutex_unlock(&vpu->lock);
400 void vpu_release_core(struct vpu_core *core)
405 mutex_lock(&core->lock);
406 pm_runtime_put_sync(core->dev);
407 if (core->request_count)
408 core->request_count--;
409 mutex_unlock(&core->lock);
412 int vpu_inst_register(struct vpu_inst *inst)
415 struct vpu_core *core;
421 core = vpu_request_core(vpu, inst->type);
423 dev_err(vpu->dev, "there is no vpu core for %s\n",
424 vpu_core_type_desc(inst->type));
428 inst->dev = get_device(core->dev);
431 mutex_lock(&core->lock);
432 if (core->state != VPU_CORE_ACTIVE) {
433 dev_err(core->dev, "vpu core is not active, state = %d\n", core->state);
438 if (inst->id >= 0 && inst->id < core->supported_instance_count)
441 ret = vpu_core_acquire_instance(core);
445 vpu_trace(inst->dev, "[%d] %p\n", ret, inst);
447 list_add_tail(&inst->list, &core->instances);
449 if (core->res->act_size) {
450 inst->act.phys = core->act.phys + core->res->act_size * inst->id;
451 inst->act.virt = core->act.virt + core->res->act_size * inst->id;
452 inst->act.length = core->res->act_size;
454 vpu_inst_create_dbgfs_file(inst);
456 mutex_unlock(&core->lock);
459 dev_err(core->dev, "register instance fail\n");
463 int vpu_inst_unregister(struct vpu_inst *inst)
465 struct vpu_core *core;
471 vpu_clear_request(inst);
472 mutex_lock(&core->lock);
473 if (inst->id >= 0 && inst->id < core->supported_instance_count) {
474 vpu_inst_remove_dbgfs_file(inst);
475 list_del_init(&inst->list);
476 vpu_core_release_instance(core, inst->id);
477 inst->id = VPU_INST_NULL_ID;
479 vpu_core_update_state(core);
480 if (core->state == VPU_CORE_HANG && !core->instance_mask) {
483 dev_info(core->dev, "reset hang core\n");
484 mutex_unlock(&core->lock);
485 err = vpu_core_sw_reset(core);
486 mutex_lock(&core->lock);
488 vpu_core_set_state(core, VPU_CORE_ACTIVE);
492 mutex_unlock(&core->lock);
497 struct vpu_inst *vpu_core_find_instance(struct vpu_core *core, u32 index)
499 struct vpu_inst *inst = NULL;
500 struct vpu_inst *tmp;
502 mutex_lock(&core->lock);
503 if (index >= core->supported_instance_count || !test_bit(index, &core->instance_mask))
505 list_for_each_entry(tmp, &core->instances, list) {
506 if (tmp->id == index) {
507 inst = vpu_inst_get(tmp);
512 mutex_unlock(&core->lock);
517 const struct vpu_core_resources *vpu_get_resource(struct vpu_inst *inst)
520 struct vpu_core *core = NULL;
521 const struct vpu_core_resources *res = NULL;
523 if (!inst || !inst->vpu)
526 if (inst->core && inst->core->res)
527 return inst->core->res;
530 mutex_lock(&vpu->lock);
531 list_for_each_entry(core, &vpu->cores, list) {
532 if (core->type == inst->type) {
537 mutex_unlock(&vpu->lock);
542 static int vpu_core_parse_dt(struct vpu_core *core, struct device_node *np)
544 struct device_node *node;
548 if (of_count_phandle_with_args(np, "memory-region", NULL) < 2) {
549 dev_err(core->dev, "need 2 memory-region for boot and rpc\n");
553 node = of_parse_phandle(np, "memory-region", 0);
555 dev_err(core->dev, "boot-region of_parse_phandle error\n");
558 if (of_address_to_resource(node, 0, &res)) {
559 dev_err(core->dev, "boot-region of_address_to_resource error\n");
563 core->fw.phys = res.start;
564 core->fw.length = resource_size(&res);
568 node = of_parse_phandle(np, "memory-region", 1);
570 dev_err(core->dev, "rpc-region of_parse_phandle error\n");
573 if (of_address_to_resource(node, 0, &res)) {
574 dev_err(core->dev, "rpc-region of_address_to_resource error\n");
578 core->rpc.phys = res.start;
579 core->rpc.length = resource_size(&res);
581 if (core->rpc.length < core->res->rpc_size + core->res->fwlog_size) {
582 dev_err(core->dev, "the rpc-region <%pad, 0x%x> is not enough\n",
583 &core->rpc.phys, core->rpc.length);
588 core->fw.virt = memremap(core->fw.phys, core->fw.length, MEMREMAP_WC);
589 core->rpc.virt = memremap(core->rpc.phys, core->rpc.length, MEMREMAP_WC);
590 memset(core->rpc.virt, 0, core->rpc.length);
592 ret = vpu_iface_check_memory_region(core, core->rpc.phys, core->rpc.length);
593 if (ret != VPU_CORE_MEMORY_UNCACHED) {
594 dev_err(core->dev, "rpc region<%pad, 0x%x> isn't uncached\n",
595 &core->rpc.phys, core->rpc.length);
600 core->log.phys = core->rpc.phys + core->res->rpc_size;
601 core->log.virt = core->rpc.virt + core->res->rpc_size;
602 core->log.length = core->res->fwlog_size;
603 core->act.phys = core->log.phys + core->log.length;
604 core->act.virt = core->log.virt + core->log.length;
605 core->act.length = core->rpc.length - core->res->rpc_size - core->log.length;
606 core->rpc.length = core->res->rpc_size;
613 static int vpu_core_probe(struct platform_device *pdev)
615 struct device *dev = &pdev->dev;
616 struct vpu_core *core;
617 struct vpu_dev *vpu = dev_get_drvdata(dev->parent);
618 struct vpu_shared_addr *iface;
622 dev_dbg(dev, "probe\n");
625 core = devm_kzalloc(dev, sizeof(*core), GFP_KERNEL);
631 platform_set_drvdata(pdev, core);
633 INIT_LIST_HEAD(&core->instances);
634 mutex_init(&core->lock);
635 mutex_init(&core->cmd_lock);
636 init_completion(&core->cmp);
637 init_waitqueue_head(&core->ack_wq);
638 vpu_core_set_state(core, VPU_CORE_DEINIT);
640 core->res = of_device_get_match_data(dev);
644 core->type = core->res->type;
645 core->id = of_alias_get_id(dev->of_node, "vpu-core");
647 dev_err(dev, "can't get vpu core id\n");
650 dev_info(core->dev, "[%d] = %s\n", core->id, vpu_core_type_desc(core->type));
651 ret = vpu_core_parse_dt(core, dev->of_node);
655 core->base = devm_platform_ioremap_resource(pdev, 0);
656 if (IS_ERR(core->base))
657 return PTR_ERR(core->base);
659 if (!vpu_iface_check_codec(core)) {
660 dev_err(core->dev, "is not supported\n");
664 ret = vpu_mbox_init(core);
668 iface = devm_kzalloc(dev, sizeof(*iface), GFP_KERNEL);
672 iface_data_size = vpu_iface_get_data_size(core);
673 if (iface_data_size) {
674 iface->priv = devm_kzalloc(dev, iface_data_size, GFP_KERNEL);
679 ret = vpu_iface_init(core, iface, &core->rpc, core->fw.phys);
681 dev_err(core->dev, "init iface fail, ret = %d\n", ret);
685 vpu_iface_config_system(core, vpu->res->mreg_base, vpu->base);
686 vpu_iface_set_log_buf(core, &core->log);
688 pm_runtime_enable(dev);
689 ret = pm_runtime_resume_and_get(dev);
691 pm_runtime_put_noidle(dev);
692 pm_runtime_set_suspended(dev);
693 goto err_runtime_disable;
696 ret = vpu_core_register(dev->parent, core);
698 goto err_core_register;
699 core->parent = dev->parent;
701 pm_runtime_put_sync(dev);
702 vpu_core_create_dbgfs_file(core);
707 pm_runtime_put_sync(dev);
709 pm_runtime_disable(dev);
714 static void vpu_core_remove(struct platform_device *pdev)
716 struct device *dev = &pdev->dev;
717 struct vpu_core *core = platform_get_drvdata(pdev);
720 vpu_core_remove_dbgfs_file(core);
721 ret = pm_runtime_resume_and_get(dev);
724 vpu_core_shutdown(core);
725 pm_runtime_put_sync(dev);
726 pm_runtime_disable(dev);
728 vpu_core_unregister(core->parent, core);
729 memunmap(core->fw.virt);
730 memunmap(core->rpc.virt);
731 mutex_destroy(&core->lock);
732 mutex_destroy(&core->cmd_lock);
735 static int __maybe_unused vpu_core_runtime_resume(struct device *dev)
737 struct vpu_core *core = dev_get_drvdata(dev);
739 return vpu_mbox_request(core);
742 static int __maybe_unused vpu_core_runtime_suspend(struct device *dev)
744 struct vpu_core *core = dev_get_drvdata(dev);
750 static void vpu_core_cancel_work(struct vpu_core *core)
752 struct vpu_inst *inst = NULL;
754 cancel_work_sync(&core->msg_work);
755 cancel_delayed_work_sync(&core->msg_delayed_work);
757 mutex_lock(&core->lock);
758 list_for_each_entry(inst, &core->instances, list)
759 cancel_work_sync(&inst->msg_work);
760 mutex_unlock(&core->lock);
763 static void vpu_core_resume_work(struct vpu_core *core)
765 struct vpu_inst *inst = NULL;
766 unsigned long delay = msecs_to_jiffies(10);
768 queue_work(core->workqueue, &core->msg_work);
769 queue_delayed_work(core->workqueue, &core->msg_delayed_work, delay);
771 mutex_lock(&core->lock);
772 list_for_each_entry(inst, &core->instances, list)
773 queue_work(inst->workqueue, &inst->msg_work);
774 mutex_unlock(&core->lock);
777 static int __maybe_unused vpu_core_resume(struct device *dev)
779 struct vpu_core *core = dev_get_drvdata(dev);
782 mutex_lock(&core->lock);
783 pm_runtime_resume_and_get(dev);
784 vpu_core_get_vpu(core);
786 if (core->request_count) {
787 if (!vpu_iface_get_power_state(core))
788 ret = vpu_core_boot(core, false);
790 ret = vpu_core_sw_reset(core);
792 dev_err(core->dev, "resume fail\n");
793 vpu_core_set_state(core, VPU_CORE_HANG);
796 vpu_core_update_state(core);
797 pm_runtime_put_sync(dev);
798 mutex_unlock(&core->lock);
800 vpu_core_resume_work(core);
804 static int __maybe_unused vpu_core_suspend(struct device *dev)
806 struct vpu_core *core = dev_get_drvdata(dev);
809 mutex_lock(&core->lock);
810 if (core->request_count)
811 ret = vpu_core_snapshot(core);
812 mutex_unlock(&core->lock);
816 vpu_core_cancel_work(core);
818 mutex_lock(&core->lock);
819 vpu_core_put_vpu(core);
820 mutex_unlock(&core->lock);
824 static const struct dev_pm_ops vpu_core_pm_ops = {
825 SET_RUNTIME_PM_OPS(vpu_core_runtime_suspend, vpu_core_runtime_resume, NULL)
826 SET_SYSTEM_SLEEP_PM_OPS(vpu_core_suspend, vpu_core_resume)
829 static struct vpu_core_resources imx8q_enc = {
830 .type = VPU_CORE_TYPE_ENC,
831 .fwname = "/*(DEBLOBBED)*/",
840 .fwlog_size = 0x80000,
844 static struct vpu_core_resources imx8q_dec = {
845 .type = VPU_CORE_TYPE_DEC,
846 .fwname = "/*(DEBLOBBED)*/",
855 .fwlog_size = 0x80000,
858 static const struct of_device_id vpu_core_dt_match[] = {
859 { .compatible = "nxp,imx8q-vpu-encoder", .data = &imx8q_enc },
860 { .compatible = "nxp,imx8q-vpu-decoder", .data = &imx8q_dec },
863 MODULE_DEVICE_TABLE(of, vpu_core_dt_match);
865 static struct platform_driver amphion_vpu_core_driver = {
866 .probe = vpu_core_probe,
867 .remove_new = vpu_core_remove,
869 .name = "amphion-vpu-core",
870 .of_match_table = vpu_core_dt_match,
871 .pm = &vpu_core_pm_ops,
875 int __init vpu_core_driver_init(void)
877 return platform_driver_register(&hion_vpu_core_driver);
880 void __exit vpu_core_driver_exit(void)
882 platform_driver_unregister(&hion_vpu_core_driver);