1 // SPDX-License-Identifier: GPL-2.0-only
3 * IOMMU API for ARM architected SMMU implementations.
5 * Copyright (C) 2013 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver currently supports:
10 * - SMMUv1 and v2 implementations
11 * - Stream-matching and stream-indexing
12 * - v7/v8 long-descriptor format
13 * - Non-secure access to the SMMU
14 * - Context fault reporting
15 * - Extended Stream ID (16 bit)
18 #define pr_fmt(fmt) "arm-smmu: " fmt
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
29 #include <linux/iopoll.h>
30 #include <linux/module.h>
32 #include <linux/of_address.h>
33 #include <linux/of_device.h>
34 #include <linux/of_iommu.h>
35 #include <linux/pci.h>
36 #include <linux/platform_device.h>
37 #include <linux/pm_runtime.h>
38 #include <linux/ratelimit.h>
39 #include <linux/slab.h>
41 #include <linux/amba/bus.h>
42 #include <linux/fsl/mc.h>
47 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
48 * global register space are still, in fact, using a hypervisor to mediate it
49 * by trapping and emulating register accesses. Sadly, some deployed versions
50 * of said trapping code have bugs wherein they go horribly wrong for stores
51 * using r31 (i.e. XZR/WZR) as the source register.
53 #define QCOM_DUMMY_VAL -1
55 #define MSI_IOVA_BASE 0x8000000
56 #define MSI_IOVA_LENGTH 0x100000
58 static int force_stage;
59 module_param(force_stage, int, S_IRUGO);
60 MODULE_PARM_DESC(force_stage,
61 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
62 static bool disable_bypass =
63 IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
64 module_param(disable_bypass, bool, S_IRUGO);
65 MODULE_PARM_DESC(disable_bypass,
66 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
68 #define s2cr_init_val (struct arm_smmu_s2cr){ \
69 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
72 static bool using_legacy_binding, using_generic_binding;
74 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
76 if (pm_runtime_enabled(smmu->dev))
77 return pm_runtime_resume_and_get(smmu->dev);
82 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
84 if (pm_runtime_enabled(smmu->dev))
85 pm_runtime_put_autosuspend(smmu->dev);
88 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
90 return container_of(dom, struct arm_smmu_domain, domain);
93 static struct platform_driver arm_smmu_driver;
94 static struct iommu_ops arm_smmu_ops;
96 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
97 static int arm_smmu_bus_init(struct iommu_ops *ops);
99 static struct device_node *dev_get_dev_node(struct device *dev)
101 if (dev_is_pci(dev)) {
102 struct pci_bus *bus = to_pci_dev(dev)->bus;
104 while (!pci_is_root_bus(bus))
106 return of_node_get(bus->bridge->parent->of_node);
109 return of_node_get(dev->of_node);
112 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
114 *((__be32 *)data) = cpu_to_be32(alias);
115 return 0; /* Continue walking */
118 static int __find_legacy_master_phandle(struct device *dev, void *data)
120 struct of_phandle_iterator *it = *(void **)data;
121 struct device_node *np = it->node;
124 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
125 "#stream-id-cells", -1)
126 if (it->node == np) {
127 *(void **)data = dev;
131 return err == -ENOENT ? 0 : err;
134 static int arm_smmu_register_legacy_master(struct device *dev,
135 struct arm_smmu_device **smmu)
137 struct device *smmu_dev;
138 struct device_node *np;
139 struct of_phandle_iterator it;
145 np = dev_get_dev_node(dev);
146 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
152 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
153 __find_legacy_master_phandle);
161 if (dev_is_pci(dev)) {
162 /* "mmu-masters" assumes Stream ID == Requester ID */
163 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
169 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
174 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
178 *smmu = dev_get_drvdata(smmu_dev);
179 of_phandle_iterator_args(&it, sids, it.cur_count);
180 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
186 * With the legacy DT binding in play, we have no guarantees about
187 * probe order, but then we're also not doing default domains, so we can
188 * delay setting bus ops until we're sure every possible SMMU is ready,
189 * and that way ensure that no probe_device() calls get missed.
191 static int arm_smmu_legacy_bus_init(void)
193 if (using_legacy_binding)
194 return arm_smmu_bus_init(&arm_smmu_ops);
197 device_initcall_sync(arm_smmu_legacy_bus_init);
199 static int arm_smmu_register_legacy_master(struct device *dev,
200 struct arm_smmu_device **smmu)
204 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
206 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
211 /* Wait for any pending TLB invalidations to complete */
212 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
213 int sync, int status)
215 unsigned int spin_cnt, delay;
218 if (smmu->impl && unlikely(smmu->impl->tlb_sync))
219 return smmu->impl->tlb_sync(smmu, page, sync, status);
221 arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
222 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
223 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
224 reg = arm_smmu_readl(smmu, page, status);
225 if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
231 dev_err_ratelimited(smmu->dev,
232 "TLB sync timed out -- SMMU may be deadlocked\n");
235 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
239 spin_lock_irqsave(&smmu->global_sync_lock, flags);
240 __arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
241 ARM_SMMU_GR0_sTLBGSTATUS);
242 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
245 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
247 struct arm_smmu_device *smmu = smmu_domain->smmu;
250 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
251 __arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
252 ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
253 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
256 static void arm_smmu_tlb_inv_context_s1(void *cookie)
258 struct arm_smmu_domain *smmu_domain = cookie;
260 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
261 * current CPU are visible beforehand.
264 arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
265 ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
266 arm_smmu_tlb_sync_context(smmu_domain);
269 static void arm_smmu_tlb_inv_context_s2(void *cookie)
271 struct arm_smmu_domain *smmu_domain = cookie;
272 struct arm_smmu_device *smmu = smmu_domain->smmu;
276 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
277 arm_smmu_tlb_sync_global(smmu);
280 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
281 size_t granule, void *cookie, int reg)
283 struct arm_smmu_domain *smmu_domain = cookie;
284 struct arm_smmu_device *smmu = smmu_domain->smmu;
285 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
286 int idx = cfg->cbndx;
288 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
291 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
292 iova = (iova >> 12) << 12;
295 arm_smmu_cb_write(smmu, idx, reg, iova);
297 } while (size -= granule);
300 iova |= (u64)cfg->asid << 48;
302 arm_smmu_cb_writeq(smmu, idx, reg, iova);
303 iova += granule >> 12;
304 } while (size -= granule);
308 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
309 size_t granule, void *cookie, int reg)
311 struct arm_smmu_domain *smmu_domain = cookie;
312 struct arm_smmu_device *smmu = smmu_domain->smmu;
313 int idx = smmu_domain->cfg.cbndx;
315 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
320 if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
321 arm_smmu_cb_writeq(smmu, idx, reg, iova);
323 arm_smmu_cb_write(smmu, idx, reg, iova);
324 iova += granule >> 12;
325 } while (size -= granule);
328 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
329 size_t granule, void *cookie)
331 arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
332 ARM_SMMU_CB_S1_TLBIVA);
333 arm_smmu_tlb_sync_context(cookie);
336 static void arm_smmu_tlb_inv_leaf_s1(unsigned long iova, size_t size,
337 size_t granule, void *cookie)
339 arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
340 ARM_SMMU_CB_S1_TLBIVAL);
341 arm_smmu_tlb_sync_context(cookie);
344 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
345 unsigned long iova, size_t granule,
348 arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
349 ARM_SMMU_CB_S1_TLBIVAL);
352 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
353 size_t granule, void *cookie)
355 arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
356 ARM_SMMU_CB_S2_TLBIIPAS2);
357 arm_smmu_tlb_sync_context(cookie);
360 static void arm_smmu_tlb_inv_leaf_s2(unsigned long iova, size_t size,
361 size_t granule, void *cookie)
363 arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
364 ARM_SMMU_CB_S2_TLBIIPAS2L);
365 arm_smmu_tlb_sync_context(cookie);
368 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
369 unsigned long iova, size_t granule,
372 arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
373 ARM_SMMU_CB_S2_TLBIIPAS2L);
376 static void arm_smmu_tlb_inv_any_s2_v1(unsigned long iova, size_t size,
377 size_t granule, void *cookie)
379 arm_smmu_tlb_inv_context_s2(cookie);
382 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
383 * almost negligible, but the benefit of getting the first one in as far ahead
384 * of the sync as possible is significant, hence we don't just make this a
385 * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
388 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
389 unsigned long iova, size_t granule,
392 struct arm_smmu_domain *smmu_domain = cookie;
393 struct arm_smmu_device *smmu = smmu_domain->smmu;
395 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
398 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
401 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
402 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
403 .tlb_flush_walk = arm_smmu_tlb_inv_walk_s1,
404 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s1,
405 .tlb_add_page = arm_smmu_tlb_add_page_s1,
408 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
409 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
410 .tlb_flush_walk = arm_smmu_tlb_inv_walk_s2,
411 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s2,
412 .tlb_add_page = arm_smmu_tlb_add_page_s2,
415 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
416 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
417 .tlb_flush_walk = arm_smmu_tlb_inv_any_s2_v1,
418 .tlb_flush_leaf = arm_smmu_tlb_inv_any_s2_v1,
419 .tlb_add_page = arm_smmu_tlb_add_page_s2_v1,
422 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
424 u32 fsr, fsynr, cbfrsynra;
426 struct iommu_domain *domain = dev;
427 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
428 struct arm_smmu_device *smmu = smmu_domain->smmu;
429 int idx = smmu_domain->cfg.cbndx;
431 fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
432 if (!(fsr & ARM_SMMU_FSR_FAULT))
435 fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
436 iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
437 cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
439 dev_err_ratelimited(smmu->dev,
440 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
441 fsr, iova, fsynr, cbfrsynra, idx);
443 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
447 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
449 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
450 struct arm_smmu_device *smmu = dev;
451 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
452 DEFAULT_RATELIMIT_BURST);
454 gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
455 gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
456 gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
457 gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
462 if (__ratelimit(&rs)) {
463 if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
464 (gfsr & ARM_SMMU_sGFSR_USF))
466 "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
470 "Unexpected global fault, this could be serious\n");
472 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
473 gfsr, gfsynr0, gfsynr1, gfsynr2);
476 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
480 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
481 struct io_pgtable_cfg *pgtbl_cfg)
483 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
484 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
485 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
491 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
492 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
494 cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
495 cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
496 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
497 cb->tcr[1] |= ARM_SMMU_TCR2_AS;
499 cb->tcr[0] |= ARM_SMMU_TCR_EAE;
502 cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
507 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
508 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
511 cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
513 cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
516 if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
517 cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
519 cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
522 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
525 /* MAIRs (stage-1 only) */
527 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
528 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
529 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
531 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
532 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
537 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
541 struct arm_smmu_cb *cb = &smmu->cbs[idx];
542 struct arm_smmu_cfg *cfg = cb->cfg;
544 /* Unassigned context banks only need disabling */
546 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
550 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
553 if (smmu->version > ARM_SMMU_V1) {
554 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
555 reg = ARM_SMMU_CBA2R_VA64;
558 /* 16-bit VMIDs live in CBA2R */
559 if (smmu->features & ARM_SMMU_FEAT_VMID16)
560 reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
562 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
566 reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
567 if (smmu->version < ARM_SMMU_V2)
568 reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
571 * Use the weakest shareability/memory types, so they are
572 * overridden by the ttbcr/pte.
575 reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
576 ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
577 FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
578 ARM_SMMU_CBAR_S1_MEMATTR_WB);
579 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
580 /* 8-bit VMIDs live in CBAR */
581 reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
583 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
587 * We must write this before the TTBRs, since it determines the
588 * access behaviour of some fields (in particular, ASID[15:8]).
590 if (stage1 && smmu->version > ARM_SMMU_V1)
591 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
592 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
595 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
596 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
597 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
598 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
600 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
602 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
606 /* MAIRs (stage-1 only) */
608 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
609 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
613 reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
614 ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
616 reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
617 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
618 reg |= ARM_SMMU_SCTLR_E;
620 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
623 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
624 struct arm_smmu_device *smmu,
625 struct device *dev, unsigned int start)
627 if (smmu->impl && smmu->impl->alloc_context_bank)
628 return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
630 return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
633 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
634 struct arm_smmu_device *smmu,
637 int irq, start, ret = 0;
638 unsigned long ias, oas;
639 struct io_pgtable_ops *pgtbl_ops;
640 struct io_pgtable_cfg pgtbl_cfg;
641 enum io_pgtable_fmt fmt;
642 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
643 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
644 irqreturn_t (*context_fault)(int irq, void *dev);
646 mutex_lock(&smmu_domain->init_mutex);
647 if (smmu_domain->smmu)
650 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
651 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
652 smmu_domain->smmu = smmu;
657 * Mapping the requested stage onto what we support is surprisingly
658 * complicated, mainly because the spec allows S1+S2 SMMUs without
659 * support for nested translation. That means we end up with the
662 * Requested Supported Actual
672 * Note that you can't actually request stage-2 mappings.
674 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
675 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
676 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
677 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
680 * Choosing a suitable context format is even more fiddly. Until we
681 * grow some way for the caller to express a preference, and/or move
682 * the decision into the io-pgtable code where it arguably belongs,
683 * just aim for the closest thing to the rest of the system, and hope
684 * that the hardware isn't esoteric enough that we can't assume AArch64
685 * support to be a superset of AArch32 support...
687 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
688 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
689 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
690 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
691 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
692 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
693 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
694 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
695 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
696 ARM_SMMU_FEAT_FMT_AARCH64_16K |
697 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
698 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
700 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
705 switch (smmu_domain->stage) {
706 case ARM_SMMU_DOMAIN_S1:
707 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
708 start = smmu->num_s2_context_banks;
710 oas = smmu->ipa_size;
711 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
712 fmt = ARM_64_LPAE_S1;
713 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
714 fmt = ARM_32_LPAE_S1;
715 ias = min(ias, 32UL);
716 oas = min(oas, 40UL);
719 ias = min(ias, 32UL);
720 oas = min(oas, 32UL);
722 smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
724 case ARM_SMMU_DOMAIN_NESTED:
726 * We will likely want to change this if/when KVM gets
729 case ARM_SMMU_DOMAIN_S2:
730 cfg->cbar = CBAR_TYPE_S2_TRANS;
732 ias = smmu->ipa_size;
734 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
735 fmt = ARM_64_LPAE_S2;
737 fmt = ARM_32_LPAE_S2;
738 ias = min(ias, 40UL);
739 oas = min(oas, 40UL);
741 if (smmu->version == ARM_SMMU_V2)
742 smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
744 smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
751 ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
756 smmu_domain->smmu = smmu;
759 if (smmu->version < ARM_SMMU_V2) {
760 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
761 cfg->irptndx %= smmu->num_context_irqs;
763 cfg->irptndx = cfg->cbndx;
766 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
767 cfg->vmid = cfg->cbndx + 1;
769 cfg->asid = cfg->cbndx;
771 pgtbl_cfg = (struct io_pgtable_cfg) {
772 .pgsize_bitmap = smmu->pgsize_bitmap,
775 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
776 .tlb = smmu_domain->flush_ops,
777 .iommu_dev = smmu->dev,
780 if (smmu->impl && smmu->impl->init_context) {
781 ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
786 if (smmu_domain->non_strict)
787 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
789 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
795 /* Update the domain's page sizes to reflect the page table format */
796 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
798 if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
799 domain->geometry.aperture_start = ~0UL << ias;
800 domain->geometry.aperture_end = ~0UL;
802 domain->geometry.aperture_end = (1UL << ias) - 1;
805 domain->geometry.force_aperture = true;
807 /* Initialise the context bank with our page table cfg */
808 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
809 arm_smmu_write_context_bank(smmu, cfg->cbndx);
812 * Request context fault interrupt. Do this last to avoid the
813 * handler seeing a half-initialised domain state.
815 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
817 if (smmu->impl && smmu->impl->context_fault)
818 context_fault = smmu->impl->context_fault;
820 context_fault = arm_smmu_context_fault;
822 ret = devm_request_irq(smmu->dev, irq, context_fault,
823 IRQF_SHARED, "arm-smmu-context-fault", domain);
825 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
827 cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
830 mutex_unlock(&smmu_domain->init_mutex);
832 /* Publish page table ops for map/unmap */
833 smmu_domain->pgtbl_ops = pgtbl_ops;
837 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
838 smmu_domain->smmu = NULL;
840 mutex_unlock(&smmu_domain->init_mutex);
844 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
846 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
847 struct arm_smmu_device *smmu = smmu_domain->smmu;
848 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
851 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
854 ret = arm_smmu_rpm_get(smmu);
859 * Disable the context bank and free the page tables before freeing
862 smmu->cbs[cfg->cbndx].cfg = NULL;
863 arm_smmu_write_context_bank(smmu, cfg->cbndx);
865 if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
866 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
867 devm_free_irq(smmu->dev, irq, domain);
870 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
871 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
873 arm_smmu_rpm_put(smmu);
876 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
878 struct arm_smmu_domain *smmu_domain;
880 if (type != IOMMU_DOMAIN_UNMANAGED &&
881 type != IOMMU_DOMAIN_DMA &&
882 type != IOMMU_DOMAIN_IDENTITY)
885 * Allocate the domain and initialise some of its data structures.
886 * We can't really do anything meaningful until we've added a
889 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
893 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
894 iommu_get_dma_cookie(&smmu_domain->domain))) {
899 mutex_init(&smmu_domain->init_mutex);
900 spin_lock_init(&smmu_domain->cb_lock);
902 return &smmu_domain->domain;
905 static void arm_smmu_domain_free(struct iommu_domain *domain)
907 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
910 * Free the domain resources. We assume that all devices have
911 * already been detached.
913 iommu_put_dma_cookie(domain);
914 arm_smmu_destroy_domain_context(domain);
918 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
920 struct arm_smmu_smr *smr = smmu->smrs + idx;
921 u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
922 FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
924 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
925 reg |= ARM_SMMU_SMR_VALID;
926 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
929 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
931 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
934 if (smmu->impl && smmu->impl->write_s2cr) {
935 smmu->impl->write_s2cr(smmu, idx);
939 reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
940 FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
941 FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
943 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
944 smmu->smrs[idx].valid)
945 reg |= ARM_SMMU_S2CR_EXIDVALID;
946 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
949 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
951 arm_smmu_write_s2cr(smmu, idx);
953 arm_smmu_write_smr(smmu, idx);
957 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
958 * should be called after sCR0 is written.
960 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
968 * If we've had to accommodate firmware memory regions, we may
969 * have live SMRs by now; tread carefully...
971 * Somewhat perversely, not having a free SMR for this test implies we
972 * can get away without it anyway, as we'll only be able to 'allocate'
973 * these SMRs for the ID/mask values we're already trusting to be OK.
975 for (i = 0; i < smmu->num_mapping_groups; i++)
976 if (!smmu->smrs[i].valid)
981 * SMR.ID bits may not be preserved if the corresponding MASK
982 * bits are set, so check each one separately. We can reject
983 * masters later if they try to claim IDs outside these masks.
985 smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
986 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
987 smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
988 smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
990 smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
991 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
992 smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
993 smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
996 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
998 struct arm_smmu_smr *smrs = smmu->smrs;
999 int i, free_idx = -ENOSPC;
1001 /* Stream indexing is blissfully easy */
1005 /* Validating SMRs is... less so */
1006 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1007 if (!smrs[i].valid) {
1009 * Note the first free entry we come across, which
1010 * we'll claim in the end if nothing else matches.
1017 * If the new entry is _entirely_ matched by an existing entry,
1018 * then reuse that, with the guarantee that there also cannot
1019 * be any subsequent conflicting entries. In normal use we'd
1020 * expect simply identical entries for this case, but there's
1021 * no harm in accommodating the generalisation.
1023 if ((mask & smrs[i].mask) == mask &&
1024 !((id ^ smrs[i].id) & ~smrs[i].mask))
1027 * If the new entry has any other overlap with an existing one,
1028 * though, then there always exists at least one stream ID
1029 * which would cause a conflict, and we can't allow that risk.
1031 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1038 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1040 if (--smmu->s2crs[idx].count)
1043 smmu->s2crs[idx] = s2cr_init_val;
1045 smmu->smrs[idx].valid = false;
1050 static int arm_smmu_master_alloc_smes(struct device *dev)
1052 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1053 struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1054 struct arm_smmu_device *smmu = cfg->smmu;
1055 struct arm_smmu_smr *smrs = smmu->smrs;
1058 mutex_lock(&smmu->stream_map_mutex);
1059 /* Figure out a viable stream map entry allocation */
1060 for_each_cfg_sme(cfg, fwspec, i, idx) {
1061 u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1062 u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1064 if (idx != INVALID_SMENDX) {
1069 ret = arm_smmu_find_sme(smmu, sid, mask);
1074 if (smrs && smmu->s2crs[idx].count == 0) {
1076 smrs[idx].mask = mask;
1077 smrs[idx].valid = true;
1079 smmu->s2crs[idx].count++;
1080 cfg->smendx[i] = (s16)idx;
1083 /* It worked! Now, poke the actual hardware */
1084 for_each_cfg_sme(cfg, fwspec, i, idx)
1085 arm_smmu_write_sme(smmu, idx);
1087 mutex_unlock(&smmu->stream_map_mutex);
1092 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1093 cfg->smendx[i] = INVALID_SMENDX;
1095 mutex_unlock(&smmu->stream_map_mutex);
1099 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1100 struct iommu_fwspec *fwspec)
1102 struct arm_smmu_device *smmu = cfg->smmu;
1105 mutex_lock(&smmu->stream_map_mutex);
1106 for_each_cfg_sme(cfg, fwspec, i, idx) {
1107 if (arm_smmu_free_sme(smmu, idx))
1108 arm_smmu_write_sme(smmu, idx);
1109 cfg->smendx[i] = INVALID_SMENDX;
1111 mutex_unlock(&smmu->stream_map_mutex);
1114 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1115 struct arm_smmu_master_cfg *cfg,
1116 struct iommu_fwspec *fwspec)
1118 struct arm_smmu_device *smmu = smmu_domain->smmu;
1119 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1120 u8 cbndx = smmu_domain->cfg.cbndx;
1121 enum arm_smmu_s2cr_type type;
1124 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1125 type = S2CR_TYPE_BYPASS;
1127 type = S2CR_TYPE_TRANS;
1129 for_each_cfg_sme(cfg, fwspec, i, idx) {
1130 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1133 s2cr[idx].type = type;
1134 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1135 s2cr[idx].cbndx = cbndx;
1136 arm_smmu_write_s2cr(smmu, idx);
1141 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1143 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1144 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1145 struct arm_smmu_master_cfg *cfg;
1146 struct arm_smmu_device *smmu;
1149 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1150 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1155 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1156 * domains between of_xlate() and probe_device() - we have no way to cope
1157 * with that, so until ARM gets converted to rely on groups and default
1158 * domains, just say no (but more politely than by dereferencing NULL).
1159 * This should be at least a WARN_ON once that's sorted.
1161 cfg = dev_iommu_priv_get(dev);
1167 ret = arm_smmu_rpm_get(smmu);
1171 /* Ensure that the domain is finalised */
1172 ret = arm_smmu_init_domain_context(domain, smmu, dev);
1177 * Sanity check the domain. We don't support domains across
1180 if (smmu_domain->smmu != smmu) {
1182 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1183 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1188 /* Looks ok, so add the device to the domain */
1189 ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
1192 * Setup an autosuspend delay to avoid bouncing runpm state.
1193 * Otherwise, if a driver for a suspended consumer device
1194 * unmaps buffers, it will runpm resume/suspend for each one.
1196 * For example, when used by a GPU device, when an application
1197 * or game exits, it can trigger unmapping 100s or 1000s of
1198 * buffers. With a runpm cycle for each buffer, that adds up
1199 * to 5-10sec worth of reprogramming the context bank, while
1200 * the system appears to be locked up to the user.
1202 pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1203 pm_runtime_use_autosuspend(smmu->dev);
1206 arm_smmu_rpm_put(smmu);
1210 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1211 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
1213 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1214 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1220 arm_smmu_rpm_get(smmu);
1221 ret = ops->map(ops, iova, paddr, size, prot, gfp);
1222 arm_smmu_rpm_put(smmu);
1227 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1228 size_t size, struct iommu_iotlb_gather *gather)
1230 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1231 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1237 arm_smmu_rpm_get(smmu);
1238 ret = ops->unmap(ops, iova, size, gather);
1239 arm_smmu_rpm_put(smmu);
1244 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1246 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1247 struct arm_smmu_device *smmu = smmu_domain->smmu;
1249 if (smmu_domain->flush_ops) {
1250 arm_smmu_rpm_get(smmu);
1251 smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1252 arm_smmu_rpm_put(smmu);
1256 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1257 struct iommu_iotlb_gather *gather)
1259 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1260 struct arm_smmu_device *smmu = smmu_domain->smmu;
1265 arm_smmu_rpm_get(smmu);
1266 if (smmu->version == ARM_SMMU_V2 ||
1267 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1268 arm_smmu_tlb_sync_context(smmu_domain);
1270 arm_smmu_tlb_sync_global(smmu);
1271 arm_smmu_rpm_put(smmu);
1274 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1277 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1278 struct arm_smmu_device *smmu = smmu_domain->smmu;
1279 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1280 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1281 struct device *dev = smmu->dev;
1285 unsigned long va, flags;
1286 int ret, idx = cfg->cbndx;
1287 phys_addr_t addr = 0;
1289 ret = arm_smmu_rpm_get(smmu);
1293 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1294 va = iova & ~0xfffUL;
1295 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1296 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1298 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1300 reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1301 if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1303 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1305 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1307 arm_smmu_rpm_put(smmu);
1308 return ops->iova_to_phys(ops, iova);
1311 phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1312 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1313 if (phys & ARM_SMMU_CB_PAR_F) {
1314 dev_err(dev, "translation fault!\n");
1315 dev_err(dev, "PAR = 0x%llx\n", phys);
1319 addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1321 arm_smmu_rpm_put(smmu);
1326 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1329 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1330 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1332 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1338 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1339 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1340 return arm_smmu_iova_to_phys_hard(domain, iova);
1342 return ops->iova_to_phys(ops, iova);
1345 static bool arm_smmu_capable(enum iommu_cap cap)
1348 case IOMMU_CAP_CACHE_COHERENCY:
1350 * Return true here as the SMMU can always send out coherent
1354 case IOMMU_CAP_NOEXEC:
1362 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1364 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1367 return dev ? dev_get_drvdata(dev) : NULL;
1370 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1372 struct arm_smmu_device *smmu = NULL;
1373 struct arm_smmu_master_cfg *cfg;
1374 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1377 if (using_legacy_binding) {
1378 ret = arm_smmu_register_legacy_master(dev, &smmu);
1381 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1382 * will allocate/initialise a new one. Thus we need to update fwspec for
1385 fwspec = dev_iommu_fwspec_get(dev);
1388 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1389 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1391 return ERR_PTR(-ENODEV);
1395 for (i = 0; i < fwspec->num_ids; i++) {
1396 u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1397 u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1399 if (sid & ~smmu->streamid_mask) {
1400 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1401 sid, smmu->streamid_mask);
1404 if (mask & ~smmu->smr_mask_mask) {
1405 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1406 mask, smmu->smr_mask_mask);
1412 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1418 dev_iommu_priv_set(dev, cfg);
1420 cfg->smendx[i] = INVALID_SMENDX;
1422 ret = arm_smmu_rpm_get(smmu);
1426 ret = arm_smmu_master_alloc_smes(dev);
1427 arm_smmu_rpm_put(smmu);
1432 device_link_add(dev, smmu->dev,
1433 DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1435 return &smmu->iommu;
1440 iommu_fwspec_free(dev);
1441 return ERR_PTR(ret);
1444 static void arm_smmu_release_device(struct device *dev)
1446 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1447 struct arm_smmu_master_cfg *cfg;
1448 struct arm_smmu_device *smmu;
1451 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1454 cfg = dev_iommu_priv_get(dev);
1457 ret = arm_smmu_rpm_get(smmu);
1461 arm_smmu_master_free_smes(cfg, fwspec);
1463 arm_smmu_rpm_put(smmu);
1465 dev_iommu_priv_set(dev, NULL);
1467 iommu_fwspec_free(dev);
1470 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1472 struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1473 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1474 struct arm_smmu_device *smmu = cfg->smmu;
1475 struct iommu_group *group = NULL;
1478 for_each_cfg_sme(cfg, fwspec, i, idx) {
1479 if (group && smmu->s2crs[idx].group &&
1480 group != smmu->s2crs[idx].group)
1481 return ERR_PTR(-EINVAL);
1483 group = smmu->s2crs[idx].group;
1487 return iommu_group_ref_get(group);
1489 if (dev_is_pci(dev))
1490 group = pci_device_group(dev);
1491 else if (dev_is_fsl_mc(dev))
1492 group = fsl_mc_device_group(dev);
1494 group = generic_device_group(dev);
1496 /* Remember group for faster lookups */
1498 for_each_cfg_sme(cfg, fwspec, i, idx)
1499 smmu->s2crs[idx].group = group;
1504 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1505 enum iommu_attr attr, void *data)
1507 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1509 switch(domain->type) {
1510 case IOMMU_DOMAIN_UNMANAGED:
1512 case DOMAIN_ATTR_NESTING:
1513 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1519 case IOMMU_DOMAIN_DMA:
1521 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1522 *(int *)data = smmu_domain->non_strict;
1533 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1534 enum iommu_attr attr, void *data)
1537 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1539 mutex_lock(&smmu_domain->init_mutex);
1541 switch(domain->type) {
1542 case IOMMU_DOMAIN_UNMANAGED:
1544 case DOMAIN_ATTR_NESTING:
1545 if (smmu_domain->smmu) {
1551 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1553 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1559 case IOMMU_DOMAIN_DMA:
1561 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1562 smmu_domain->non_strict = *(int *)data;
1572 mutex_unlock(&smmu_domain->init_mutex);
1576 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1580 if (args->args_count > 0)
1581 fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1583 if (args->args_count > 1)
1584 fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1585 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1586 fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1588 return iommu_fwspec_add_ids(dev, &fwid, 1);
1591 static void arm_smmu_get_resv_regions(struct device *dev,
1592 struct list_head *head)
1594 struct iommu_resv_region *region;
1595 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1597 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1598 prot, IOMMU_RESV_SW_MSI);
1602 list_add_tail(®ion->list, head);
1604 iommu_dma_get_resv_regions(dev, head);
1607 static int arm_smmu_def_domain_type(struct device *dev)
1609 struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1610 const struct arm_smmu_impl *impl = cfg->smmu->impl;
1612 if (impl && impl->def_domain_type)
1613 return impl->def_domain_type(dev);
1618 static struct iommu_ops arm_smmu_ops = {
1619 .capable = arm_smmu_capable,
1620 .domain_alloc = arm_smmu_domain_alloc,
1621 .domain_free = arm_smmu_domain_free,
1622 .attach_dev = arm_smmu_attach_dev,
1623 .map = arm_smmu_map,
1624 .unmap = arm_smmu_unmap,
1625 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1626 .iotlb_sync = arm_smmu_iotlb_sync,
1627 .iova_to_phys = arm_smmu_iova_to_phys,
1628 .probe_device = arm_smmu_probe_device,
1629 .release_device = arm_smmu_release_device,
1630 .device_group = arm_smmu_device_group,
1631 .domain_get_attr = arm_smmu_domain_get_attr,
1632 .domain_set_attr = arm_smmu_domain_set_attr,
1633 .of_xlate = arm_smmu_of_xlate,
1634 .get_resv_regions = arm_smmu_get_resv_regions,
1635 .put_resv_regions = generic_iommu_put_resv_regions,
1636 .def_domain_type = arm_smmu_def_domain_type,
1637 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1640 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1645 /* clear global FSR */
1646 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1647 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1650 * Reset stream mapping groups: Initial values mark all SMRn as
1651 * invalid and all S2CRn as bypass unless overridden.
1653 for (i = 0; i < smmu->num_mapping_groups; ++i)
1654 arm_smmu_write_sme(smmu, i);
1656 /* Make sure all context banks are disabled and clear CB_FSR */
1657 for (i = 0; i < smmu->num_context_banks; ++i) {
1658 arm_smmu_write_context_bank(smmu, i);
1659 arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1662 /* Invalidate the TLB, just in case */
1663 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1664 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1666 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1668 /* Enable fault reporting */
1669 reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1670 ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1672 /* Disable TLB broadcasting. */
1673 reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1675 /* Enable client access, handling unmatched streams as appropriate */
1676 reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1678 reg |= ARM_SMMU_sCR0_USFCFG;
1680 reg &= ~ARM_SMMU_sCR0_USFCFG;
1682 /* Disable forced broadcasting */
1683 reg &= ~ARM_SMMU_sCR0_FB;
1685 /* Don't upgrade barriers */
1686 reg &= ~(ARM_SMMU_sCR0_BSU);
1688 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1689 reg |= ARM_SMMU_sCR0_VMID16EN;
1691 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1692 reg |= ARM_SMMU_sCR0_EXIDENABLE;
1694 if (smmu->impl && smmu->impl->reset)
1695 smmu->impl->reset(smmu);
1697 /* Push the button */
1698 arm_smmu_tlb_sync_global(smmu);
1699 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1702 static int arm_smmu_id_size_to_bits(int size)
1721 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1725 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1728 dev_notice(smmu->dev, "probing hardware configuration...\n");
1729 dev_notice(smmu->dev, "SMMUv%d with:\n",
1730 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1733 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1735 /* Restrict available stages based on module parameter */
1736 if (force_stage == 1)
1737 id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1738 else if (force_stage == 2)
1739 id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1741 if (id & ARM_SMMU_ID0_S1TS) {
1742 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1743 dev_notice(smmu->dev, "\tstage 1 translation\n");
1746 if (id & ARM_SMMU_ID0_S2TS) {
1747 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1748 dev_notice(smmu->dev, "\tstage 2 translation\n");
1751 if (id & ARM_SMMU_ID0_NTS) {
1752 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1753 dev_notice(smmu->dev, "\tnested translation\n");
1756 if (!(smmu->features &
1757 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1758 dev_err(smmu->dev, "\tno translation support!\n");
1762 if ((id & ARM_SMMU_ID0_S1TS) &&
1763 ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1764 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1765 dev_notice(smmu->dev, "\taddress translation ops\n");
1769 * In order for DMA API calls to work properly, we must defer to what
1770 * the FW says about coherency, regardless of what the hardware claims.
1771 * Fortunately, this also opens up a workaround for systems where the
1772 * ID register value has ended up configured incorrectly.
1774 cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1775 if (cttw_fw || cttw_reg)
1776 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1777 cttw_fw ? "" : "non-");
1778 if (cttw_fw != cttw_reg)
1779 dev_notice(smmu->dev,
1780 "\t(IDR0.CTTW overridden by FW configuration)\n");
1782 /* Max. number of entries we have for stream matching/indexing */
1783 if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1784 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1787 size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1789 smmu->streamid_mask = size - 1;
1790 if (id & ARM_SMMU_ID0_SMS) {
1791 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1792 size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1795 "stream-matching supported, but no SMRs present!\n");
1799 /* Zero-initialised to mark as invalid */
1800 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1805 dev_notice(smmu->dev,
1806 "\tstream matching with %u register groups", size);
1808 /* s2cr->type == 0 means translation, so initialise explicitly */
1809 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1813 for (i = 0; i < size; i++)
1814 smmu->s2crs[i] = s2cr_init_val;
1816 smmu->num_mapping_groups = size;
1817 mutex_init(&smmu->stream_map_mutex);
1818 spin_lock_init(&smmu->global_sync_lock);
1820 if (smmu->version < ARM_SMMU_V2 ||
1821 !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1822 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1823 if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1824 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1828 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1829 smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1831 /* Check for size mismatch of SMMU address space from mapped region */
1832 size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1833 if (smmu->numpage != 2 * size << smmu->pgshift)
1835 "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1836 2 * size << smmu->pgshift, smmu->numpage);
1837 /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1838 smmu->numpage = size;
1840 smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1841 smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1842 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1843 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1846 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1847 smmu->num_context_banks, smmu->num_s2_context_banks);
1848 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1849 sizeof(*smmu->cbs), GFP_KERNEL);
1854 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1855 size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1856 smmu->ipa_size = size;
1858 /* The output mask is also applied for bypass */
1859 size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1860 smmu->pa_size = size;
1862 if (id & ARM_SMMU_ID2_VMID16)
1863 smmu->features |= ARM_SMMU_FEAT_VMID16;
1866 * What the page table walker can address actually depends on which
1867 * descriptor format is in use, but since a) we don't know that yet,
1868 * and b) it can vary per context bank, this will have to do...
1870 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1872 "failed to set DMA mask for table walker\n");
1874 if (smmu->version < ARM_SMMU_V2) {
1875 smmu->va_size = smmu->ipa_size;
1876 if (smmu->version == ARM_SMMU_V1_64K)
1877 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1879 size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1880 smmu->va_size = arm_smmu_id_size_to_bits(size);
1881 if (id & ARM_SMMU_ID2_PTFS_4K)
1882 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1883 if (id & ARM_SMMU_ID2_PTFS_16K)
1884 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1885 if (id & ARM_SMMU_ID2_PTFS_64K)
1886 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1889 if (smmu->impl && smmu->impl->cfg_probe) {
1890 ret = smmu->impl->cfg_probe(smmu);
1895 /* Now we've corralled the various formats, what'll it do? */
1896 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1897 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1898 if (smmu->features &
1899 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1900 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1901 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1902 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1903 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1904 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1906 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1907 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1909 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1910 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1911 smmu->pgsize_bitmap);
1914 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1915 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1916 smmu->va_size, smmu->ipa_size);
1918 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1919 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1920 smmu->ipa_size, smmu->pa_size);
1925 struct arm_smmu_match_data {
1926 enum arm_smmu_arch_version version;
1927 enum arm_smmu_implementation model;
1930 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1931 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1933 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1934 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1935 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1936 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1937 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1938 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1940 static const struct of_device_id arm_smmu_of_match[] = {
1941 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1942 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1943 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1944 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1945 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1946 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1947 { .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1948 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1951 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1954 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1959 case ACPI_IORT_SMMU_V1:
1960 case ACPI_IORT_SMMU_CORELINK_MMU400:
1961 smmu->version = ARM_SMMU_V1;
1962 smmu->model = GENERIC_SMMU;
1964 case ACPI_IORT_SMMU_CORELINK_MMU401:
1965 smmu->version = ARM_SMMU_V1_64K;
1966 smmu->model = GENERIC_SMMU;
1968 case ACPI_IORT_SMMU_V2:
1969 smmu->version = ARM_SMMU_V2;
1970 smmu->model = GENERIC_SMMU;
1972 case ACPI_IORT_SMMU_CORELINK_MMU500:
1973 smmu->version = ARM_SMMU_V2;
1974 smmu->model = ARM_MMU500;
1976 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1977 smmu->version = ARM_SMMU_V2;
1978 smmu->model = CAVIUM_SMMUV2;
1987 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1988 struct arm_smmu_device *smmu)
1990 struct device *dev = smmu->dev;
1991 struct acpi_iort_node *node =
1992 *(struct acpi_iort_node **)dev_get_platdata(dev);
1993 struct acpi_iort_smmu *iort_smmu;
1996 /* Retrieve SMMU1/2 specific data */
1997 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1999 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2003 /* Ignore the configuration access interrupt */
2004 smmu->num_global_irqs = 1;
2006 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2007 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2012 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2013 struct arm_smmu_device *smmu)
2019 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2020 struct arm_smmu_device *smmu)
2022 const struct arm_smmu_match_data *data;
2023 struct device *dev = &pdev->dev;
2024 bool legacy_binding;
2026 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2027 &smmu->num_global_irqs)) {
2028 dev_err(dev, "missing #global-interrupts property\n");
2032 data = of_device_get_match_data(dev);
2033 smmu->version = data->version;
2034 smmu->model = data->model;
2036 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2037 if (legacy_binding && !using_generic_binding) {
2038 if (!using_legacy_binding) {
2039 pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2040 IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2042 using_legacy_binding = true;
2043 } else if (!legacy_binding && !using_legacy_binding) {
2044 using_generic_binding = true;
2046 dev_err(dev, "not probing due to mismatched DT properties\n");
2050 if (of_dma_is_coherent(dev->of_node))
2051 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2056 static int arm_smmu_bus_init(struct iommu_ops *ops)
2060 /* Oh, for a proper bus abstraction */
2061 if (!iommu_present(&platform_bus_type)) {
2062 err = bus_set_iommu(&platform_bus_type, ops);
2066 #ifdef CONFIG_ARM_AMBA
2067 if (!iommu_present(&amba_bustype)) {
2068 err = bus_set_iommu(&amba_bustype, ops);
2070 goto err_reset_platform_ops;
2074 if (!iommu_present(&pci_bus_type)) {
2075 err = bus_set_iommu(&pci_bus_type, ops);
2077 goto err_reset_amba_ops;
2080 #ifdef CONFIG_FSL_MC_BUS
2081 if (!iommu_present(&fsl_mc_bus_type)) {
2082 err = bus_set_iommu(&fsl_mc_bus_type, ops);
2084 goto err_reset_pci_ops;
2089 err_reset_pci_ops: __maybe_unused;
2091 bus_set_iommu(&pci_bus_type, NULL);
2093 err_reset_amba_ops: __maybe_unused;
2094 #ifdef CONFIG_ARM_AMBA
2095 bus_set_iommu(&amba_bustype, NULL);
2097 err_reset_platform_ops: __maybe_unused;
2098 bus_set_iommu(&platform_bus_type, NULL);
2102 static int arm_smmu_device_probe(struct platform_device *pdev)
2104 struct resource *res;
2105 resource_size_t ioaddr;
2106 struct arm_smmu_device *smmu;
2107 struct device *dev = &pdev->dev;
2108 int num_irqs, i, err;
2109 irqreturn_t (*global_fault)(int irq, void *dev);
2111 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2113 dev_err(dev, "failed to allocate arm_smmu_device\n");
2119 err = arm_smmu_device_dt_probe(pdev, smmu);
2121 err = arm_smmu_device_acpi_probe(pdev, smmu);
2126 smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
2127 if (IS_ERR(smmu->base))
2128 return PTR_ERR(smmu->base);
2129 ioaddr = res->start;
2131 * The resource size should effectively match the value of SMMU_TOP;
2132 * stash that temporarily until we know PAGESIZE to validate it with.
2134 smmu->numpage = resource_size(res);
2136 smmu = arm_smmu_impl_init(smmu);
2138 return PTR_ERR(smmu);
2141 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2143 if (num_irqs > smmu->num_global_irqs)
2144 smmu->num_context_irqs++;
2147 if (!smmu->num_context_irqs) {
2148 dev_err(dev, "found %d interrupts but expected at least %d\n",
2149 num_irqs, smmu->num_global_irqs + 1);
2153 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2156 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2160 for (i = 0; i < num_irqs; ++i) {
2161 int irq = platform_get_irq(pdev, i);
2165 smmu->irqs[i] = irq;
2168 err = devm_clk_bulk_get_all(dev, &smmu->clks);
2170 dev_err(dev, "failed to get clocks %d\n", err);
2173 smmu->num_clks = err;
2175 err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2179 err = arm_smmu_device_cfg_probe(smmu);
2183 if (smmu->version == ARM_SMMU_V2) {
2184 if (smmu->num_context_banks > smmu->num_context_irqs) {
2186 "found only %d context irq(s) but %d required\n",
2187 smmu->num_context_irqs, smmu->num_context_banks);
2191 /* Ignore superfluous interrupts */
2192 smmu->num_context_irqs = smmu->num_context_banks;
2195 if (smmu->impl && smmu->impl->global_fault)
2196 global_fault = smmu->impl->global_fault;
2198 global_fault = arm_smmu_global_fault;
2200 for (i = 0; i < smmu->num_global_irqs; ++i) {
2201 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2204 "arm-smmu global fault",
2207 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2213 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2214 "smmu.%pa", &ioaddr);
2216 dev_err(dev, "Failed to register iommu in sysfs\n");
2220 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2221 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2223 err = iommu_device_register(&smmu->iommu);
2225 dev_err(dev, "Failed to register iommu\n");
2229 platform_set_drvdata(pdev, smmu);
2230 arm_smmu_device_reset(smmu);
2231 arm_smmu_test_smr_masks(smmu);
2234 * We want to avoid touching dev->power.lock in fastpaths unless
2235 * it's really going to do something useful - pm_runtime_enabled()
2236 * can serve as an ideal proxy for that decision. So, conditionally
2237 * enable pm_runtime.
2239 if (dev->pm_domain) {
2240 pm_runtime_set_active(dev);
2241 pm_runtime_enable(dev);
2245 * For ACPI and generic DT bindings, an SMMU will be probed before
2246 * any device which might need it, so we want the bus ops in place
2247 * ready to handle default domain setup as soon as any SMMU exists.
2249 if (!using_legacy_binding)
2250 return arm_smmu_bus_init(&arm_smmu_ops);
2255 static int arm_smmu_device_remove(struct platform_device *pdev)
2257 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2262 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2263 dev_notice(&pdev->dev, "disabling translation\n");
2265 arm_smmu_bus_init(NULL);
2266 iommu_device_unregister(&smmu->iommu);
2267 iommu_device_sysfs_remove(&smmu->iommu);
2269 arm_smmu_rpm_get(smmu);
2270 /* Turn the thing off */
2271 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2272 arm_smmu_rpm_put(smmu);
2274 if (pm_runtime_enabled(smmu->dev))
2275 pm_runtime_force_suspend(smmu->dev);
2277 clk_bulk_disable(smmu->num_clks, smmu->clks);
2279 clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2283 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2285 arm_smmu_device_remove(pdev);
2288 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2290 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2293 ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2297 arm_smmu_device_reset(smmu);
2302 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2304 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2306 clk_bulk_disable(smmu->num_clks, smmu->clks);
2311 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2313 if (pm_runtime_suspended(dev))
2316 return arm_smmu_runtime_resume(dev);
2319 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2321 if (pm_runtime_suspended(dev))
2324 return arm_smmu_runtime_suspend(dev);
2327 static const struct dev_pm_ops arm_smmu_pm_ops = {
2328 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2329 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2330 arm_smmu_runtime_resume, NULL)
2333 static struct platform_driver arm_smmu_driver = {
2336 .of_match_table = arm_smmu_of_match,
2337 .pm = &arm_smmu_pm_ops,
2338 .suppress_bind_attrs = true,
2340 .probe = arm_smmu_device_probe,
2341 .remove = arm_smmu_device_remove,
2342 .shutdown = arm_smmu_device_shutdown,
2344 module_platform_driver(arm_smmu_driver);
2346 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2347 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2348 MODULE_ALIAS("platform:arm-smmu");
2349 MODULE_LICENSE("GPL v2");