2 * IOMMU API for ARM architected SMMU implementations.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * Copyright (C) 2013 ARM Limited
19 * Author: Will Deacon <will.deacon@arm.com>
21 * This driver currently supports:
22 * - SMMUv1 and v2 implementations
23 * - Stream-matching and stream-indexing
24 * - v7/v8 long-descriptor format
25 * - Non-secure access to the SMMU
26 * - Context fault reporting
27 * - Extended Stream ID (16 bit)
30 #define pr_fmt(fmt) "arm-smmu: " fmt
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
54 #include <linux/amba/bus.h>
56 #include "io-pgtable.h"
57 #include "arm-smmu-regs.h"
60 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
61 * global register space are still, in fact, using a hypervisor to mediate it
62 * by trapping and emulating register accesses. Sadly, some deployed versions
63 * of said trapping code have bugs wherein they go horribly wrong for stores
64 * using r31 (i.e. XZR/WZR) as the source register.
66 #define QCOM_DUMMY_VAL -1
68 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
70 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
71 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
73 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
74 #define TLB_SPIN_COUNT 10
76 /* Maximum number of context banks per SMMU */
77 #define ARM_SMMU_MAX_CBS 128
79 /* SMMU global address space */
80 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
81 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
84 * SMMU global address space with conditional offset to access secure
85 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
88 #define ARM_SMMU_GR0_NS(smmu) \
90 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
94 * Some 64-bit registers only make sense to write atomically, but in such
95 * cases all the data relevant to AArch32 formats lies within the lower word,
96 * therefore this actually makes more sense than it might first appear.
99 #define smmu_write_atomic_lq writeq_relaxed
101 #define smmu_write_atomic_lq writel_relaxed
104 /* Translation context bank */
105 #define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
107 #define MSI_IOVA_BASE 0x8000000
108 #define MSI_IOVA_LENGTH 0x100000
110 static int force_stage;
111 module_param(force_stage, int, S_IRUGO);
112 MODULE_PARM_DESC(force_stage,
113 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
114 static bool disable_bypass;
115 module_param(disable_bypass, bool, S_IRUGO);
116 MODULE_PARM_DESC(disable_bypass,
117 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
119 enum arm_smmu_arch_version {
125 enum arm_smmu_implementation {
132 /* Until ACPICA headers cover IORT rev. C */
133 #ifndef ACPI_IORT_SMMU_CORELINK_MMU401
134 #define ACPI_IORT_SMMU_CORELINK_MMU401 0x4
136 #ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX
137 #define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5
140 struct arm_smmu_s2cr {
141 struct iommu_group *group;
143 enum arm_smmu_s2cr_type type;
144 enum arm_smmu_s2cr_privcfg privcfg;
148 #define s2cr_init_val (struct arm_smmu_s2cr){ \
149 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
152 struct arm_smmu_smr {
162 struct arm_smmu_cfg *cfg;
165 struct arm_smmu_master_cfg {
166 struct arm_smmu_device *smmu;
169 #define INVALID_SMENDX -1
170 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
171 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
172 #define fwspec_smendx(fw, i) \
173 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
174 #define for_each_cfg_sme(fw, i, idx) \
175 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
177 struct arm_smmu_device {
181 void __iomem *cb_base;
182 unsigned long pgshift;
184 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
185 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
186 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
187 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
188 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
189 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
190 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
191 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
192 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
193 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
194 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
195 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
196 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
199 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
201 enum arm_smmu_arch_version version;
202 enum arm_smmu_implementation model;
204 u32 num_context_banks;
205 u32 num_s2_context_banks;
206 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
207 struct arm_smmu_cb *cbs;
210 u32 num_mapping_groups;
213 struct arm_smmu_smr *smrs;
214 struct arm_smmu_s2cr *s2crs;
215 struct mutex stream_map_mutex;
217 unsigned long va_size;
218 unsigned long ipa_size;
219 unsigned long pa_size;
220 unsigned long pgsize_bitmap;
223 u32 num_context_irqs;
226 u32 cavium_id_base; /* Specific to Cavium */
228 spinlock_t global_sync_lock;
230 /* IOMMU core code handle */
231 struct iommu_device iommu;
234 enum arm_smmu_context_fmt {
235 ARM_SMMU_CTX_FMT_NONE,
236 ARM_SMMU_CTX_FMT_AARCH64,
237 ARM_SMMU_CTX_FMT_AARCH32_L,
238 ARM_SMMU_CTX_FMT_AARCH32_S,
241 struct arm_smmu_cfg {
249 enum arm_smmu_context_fmt fmt;
251 #define INVALID_IRPTNDX 0xff
253 enum arm_smmu_domain_stage {
254 ARM_SMMU_DOMAIN_S1 = 0,
256 ARM_SMMU_DOMAIN_NESTED,
257 ARM_SMMU_DOMAIN_BYPASS,
260 struct arm_smmu_domain {
261 struct arm_smmu_device *smmu;
262 struct io_pgtable_ops *pgtbl_ops;
263 struct arm_smmu_cfg cfg;
264 enum arm_smmu_domain_stage stage;
265 struct mutex init_mutex; /* Protects smmu pointer */
266 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
267 struct iommu_domain domain;
270 struct arm_smmu_option_prop {
275 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
277 static bool using_legacy_binding, using_generic_binding;
279 static struct arm_smmu_option_prop arm_smmu_options[] = {
280 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
284 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
286 return container_of(dom, struct arm_smmu_domain, domain);
289 static void parse_driver_options(struct arm_smmu_device *smmu)
294 if (of_property_read_bool(smmu->dev->of_node,
295 arm_smmu_options[i].prop)) {
296 smmu->options |= arm_smmu_options[i].opt;
297 dev_notice(smmu->dev, "option %s\n",
298 arm_smmu_options[i].prop);
300 } while (arm_smmu_options[++i].opt);
303 static struct device_node *dev_get_dev_node(struct device *dev)
305 if (dev_is_pci(dev)) {
306 struct pci_bus *bus = to_pci_dev(dev)->bus;
308 while (!pci_is_root_bus(bus))
310 return of_node_get(bus->bridge->parent->of_node);
313 return of_node_get(dev->of_node);
316 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
318 *((__be32 *)data) = cpu_to_be32(alias);
319 return 0; /* Continue walking */
322 static int __find_legacy_master_phandle(struct device *dev, void *data)
324 struct of_phandle_iterator *it = *(void **)data;
325 struct device_node *np = it->node;
328 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
329 "#stream-id-cells", 0)
330 if (it->node == np) {
331 *(void **)data = dev;
335 return err == -ENOENT ? 0 : err;
338 static struct platform_driver arm_smmu_driver;
339 static struct iommu_ops arm_smmu_ops;
341 static int arm_smmu_register_legacy_master(struct device *dev,
342 struct arm_smmu_device **smmu)
344 struct device *smmu_dev;
345 struct device_node *np;
346 struct of_phandle_iterator it;
352 np = dev_get_dev_node(dev);
353 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
359 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
360 __find_legacy_master_phandle);
368 if (dev_is_pci(dev)) {
369 /* "mmu-masters" assumes Stream ID == Requester ID */
370 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
376 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
381 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
385 *smmu = dev_get_drvdata(smmu_dev);
386 of_phandle_iterator_args(&it, sids, it.cur_count);
387 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
392 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
397 idx = find_next_zero_bit(map, end, start);
400 } while (test_and_set_bit(idx, map));
405 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
410 /* Wait for any pending TLB invalidations to complete */
411 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
412 void __iomem *sync, void __iomem *status)
414 unsigned int spin_cnt, delay;
416 writel_relaxed(QCOM_DUMMY_VAL, sync);
417 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
418 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
419 if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
425 dev_err_ratelimited(smmu->dev,
426 "TLB sync timed out -- SMMU may be deadlocked\n");
429 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
431 void __iomem *base = ARM_SMMU_GR0(smmu);
434 spin_lock_irqsave(&smmu->global_sync_lock, flags);
435 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
436 base + ARM_SMMU_GR0_sTLBGSTATUS);
437 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
440 static void arm_smmu_tlb_sync_context(void *cookie)
442 struct arm_smmu_domain *smmu_domain = cookie;
443 struct arm_smmu_device *smmu = smmu_domain->smmu;
444 void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
447 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
448 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
449 base + ARM_SMMU_CB_TLBSTATUS);
450 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
453 static void arm_smmu_tlb_sync_vmid(void *cookie)
455 struct arm_smmu_domain *smmu_domain = cookie;
457 arm_smmu_tlb_sync_global(smmu_domain->smmu);
460 static void arm_smmu_tlb_inv_context_s1(void *cookie)
462 struct arm_smmu_domain *smmu_domain = cookie;
463 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
464 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
466 writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
467 arm_smmu_tlb_sync_context(cookie);
470 static void arm_smmu_tlb_inv_context_s2(void *cookie)
472 struct arm_smmu_domain *smmu_domain = cookie;
473 struct arm_smmu_device *smmu = smmu_domain->smmu;
474 void __iomem *base = ARM_SMMU_GR0(smmu);
476 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
477 arm_smmu_tlb_sync_global(smmu);
480 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
481 size_t granule, bool leaf, void *cookie)
483 struct arm_smmu_domain *smmu_domain = cookie;
484 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
485 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
486 void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
488 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
492 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
494 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
498 writel_relaxed(iova, reg);
500 } while (size -= granule);
503 iova |= (u64)cfg->asid << 48;
505 writeq_relaxed(iova, reg);
506 iova += granule >> 12;
507 } while (size -= granule);
510 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
511 ARM_SMMU_CB_S2_TLBIIPAS2;
514 smmu_write_atomic_lq(iova, reg);
515 iova += granule >> 12;
516 } while (size -= granule);
521 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
522 * almost negligible, but the benefit of getting the first one in as far ahead
523 * of the sync as possible is significant, hence we don't just make this a
524 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
526 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
527 size_t granule, bool leaf, void *cookie)
529 struct arm_smmu_domain *smmu_domain = cookie;
530 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
532 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
535 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
538 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
539 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
540 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
541 .tlb_sync = arm_smmu_tlb_sync_context,
544 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
545 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
546 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
547 .tlb_sync = arm_smmu_tlb_sync_context,
550 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
551 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
552 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
553 .tlb_sync = arm_smmu_tlb_sync_vmid,
556 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
560 struct iommu_domain *domain = dev;
561 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
562 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
563 struct arm_smmu_device *smmu = smmu_domain->smmu;
564 void __iomem *cb_base;
566 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
567 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
569 if (!(fsr & FSR_FAULT))
572 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
573 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
575 dev_err_ratelimited(smmu->dev,
576 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
577 fsr, iova, fsynr, cfg->cbndx);
579 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
583 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
585 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
586 struct arm_smmu_device *smmu = dev;
587 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
589 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
590 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
591 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
592 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
597 dev_err_ratelimited(smmu->dev,
598 "Unexpected global fault, this could be serious\n");
599 dev_err_ratelimited(smmu->dev,
600 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
601 gfsr, gfsynr0, gfsynr1, gfsynr2);
603 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
607 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
608 struct io_pgtable_cfg *pgtbl_cfg)
610 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
611 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
612 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
618 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
619 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
621 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
622 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
623 cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
624 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
625 cb->tcr[1] |= TTBCR2_AS;
628 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
633 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
634 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
635 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
637 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
638 cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
639 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
640 cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
643 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
646 /* MAIRs (stage-1 only) */
648 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
649 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
650 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
652 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
653 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
658 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
662 struct arm_smmu_cb *cb = &smmu->cbs[idx];
663 struct arm_smmu_cfg *cfg = cb->cfg;
664 void __iomem *cb_base, *gr1_base;
666 cb_base = ARM_SMMU_CB(smmu, idx);
668 /* Unassigned context banks only need disabling */
670 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
674 gr1_base = ARM_SMMU_GR1(smmu);
675 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
678 if (smmu->version > ARM_SMMU_V1) {
679 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
680 reg = CBA2R_RW64_64BIT;
682 reg = CBA2R_RW64_32BIT;
683 /* 16-bit VMIDs live in CBA2R */
684 if (smmu->features & ARM_SMMU_FEAT_VMID16)
685 reg |= cfg->vmid << CBA2R_VMID_SHIFT;
687 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
692 if (smmu->version < ARM_SMMU_V2)
693 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
696 * Use the weakest shareability/memory types, so they are
697 * overridden by the ttbcr/pte.
700 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
701 (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
702 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
703 /* 8-bit VMIDs live in CBAR */
704 reg |= cfg->vmid << CBAR_VMID_SHIFT;
706 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
710 * We must write this before the TTBRs, since it determines the
711 * access behaviour of some fields (in particular, ASID[15:8]).
713 if (stage1 && smmu->version > ARM_SMMU_V1)
714 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
715 writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
718 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
719 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
720 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
721 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
723 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
725 writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
728 /* MAIRs (stage-1 only) */
730 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
731 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
735 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
737 reg |= SCTLR_S1_ASIDPNE;
738 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
741 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
744 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
745 struct arm_smmu_device *smmu)
747 int irq, start, ret = 0;
748 unsigned long ias, oas;
749 struct io_pgtable_ops *pgtbl_ops;
750 struct io_pgtable_cfg pgtbl_cfg;
751 enum io_pgtable_fmt fmt;
752 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
753 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
754 const struct iommu_gather_ops *tlb_ops;
756 mutex_lock(&smmu_domain->init_mutex);
757 if (smmu_domain->smmu)
760 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
761 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
762 smmu_domain->smmu = smmu;
767 * Mapping the requested stage onto what we support is surprisingly
768 * complicated, mainly because the spec allows S1+S2 SMMUs without
769 * support for nested translation. That means we end up with the
772 * Requested Supported Actual
782 * Note that you can't actually request stage-2 mappings.
784 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
785 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
786 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
787 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
790 * Choosing a suitable context format is even more fiddly. Until we
791 * grow some way for the caller to express a preference, and/or move
792 * the decision into the io-pgtable code where it arguably belongs,
793 * just aim for the closest thing to the rest of the system, and hope
794 * that the hardware isn't esoteric enough that we can't assume AArch64
795 * support to be a superset of AArch32 support...
797 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
798 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
799 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
800 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
801 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
802 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
803 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
804 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
805 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
806 ARM_SMMU_FEAT_FMT_AARCH64_16K |
807 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
808 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
810 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
815 switch (smmu_domain->stage) {
816 case ARM_SMMU_DOMAIN_S1:
817 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
818 start = smmu->num_s2_context_banks;
820 oas = smmu->ipa_size;
821 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
822 fmt = ARM_64_LPAE_S1;
823 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
824 fmt = ARM_32_LPAE_S1;
825 ias = min(ias, 32UL);
826 oas = min(oas, 40UL);
829 ias = min(ias, 32UL);
830 oas = min(oas, 32UL);
832 tlb_ops = &arm_smmu_s1_tlb_ops;
834 case ARM_SMMU_DOMAIN_NESTED:
836 * We will likely want to change this if/when KVM gets
839 case ARM_SMMU_DOMAIN_S2:
840 cfg->cbar = CBAR_TYPE_S2_TRANS;
842 ias = smmu->ipa_size;
844 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
845 fmt = ARM_64_LPAE_S2;
847 fmt = ARM_32_LPAE_S2;
848 ias = min(ias, 40UL);
849 oas = min(oas, 40UL);
851 if (smmu->version == ARM_SMMU_V2)
852 tlb_ops = &arm_smmu_s2_tlb_ops_v2;
854 tlb_ops = &arm_smmu_s2_tlb_ops_v1;
860 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
861 smmu->num_context_banks);
866 if (smmu->version < ARM_SMMU_V2) {
867 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
868 cfg->irptndx %= smmu->num_context_irqs;
870 cfg->irptndx = cfg->cbndx;
873 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
874 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
876 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
878 pgtbl_cfg = (struct io_pgtable_cfg) {
879 .pgsize_bitmap = smmu->pgsize_bitmap,
883 .iommu_dev = smmu->dev,
886 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
887 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
889 smmu_domain->smmu = smmu;
890 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
896 /* Update the domain's page sizes to reflect the page table format */
897 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
898 domain->geometry.aperture_end = (1UL << ias) - 1;
899 domain->geometry.force_aperture = true;
901 /* Initialise the context bank with our page table cfg */
902 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
903 arm_smmu_write_context_bank(smmu, cfg->cbndx);
906 * Request context fault interrupt. Do this last to avoid the
907 * handler seeing a half-initialised domain state.
909 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
910 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
911 IRQF_SHARED, "arm-smmu-context-fault", domain);
913 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
915 cfg->irptndx = INVALID_IRPTNDX;
918 mutex_unlock(&smmu_domain->init_mutex);
920 /* Publish page table ops for map/unmap */
921 smmu_domain->pgtbl_ops = pgtbl_ops;
925 smmu_domain->smmu = NULL;
927 mutex_unlock(&smmu_domain->init_mutex);
931 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
933 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
934 struct arm_smmu_device *smmu = smmu_domain->smmu;
935 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
938 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
942 * Disable the context bank and free the page tables before freeing
945 smmu->cbs[cfg->cbndx].cfg = NULL;
946 arm_smmu_write_context_bank(smmu, cfg->cbndx);
948 if (cfg->irptndx != INVALID_IRPTNDX) {
949 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
950 devm_free_irq(smmu->dev, irq, domain);
953 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
954 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
957 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
959 struct arm_smmu_domain *smmu_domain;
961 if (type != IOMMU_DOMAIN_UNMANAGED &&
962 type != IOMMU_DOMAIN_DMA &&
963 type != IOMMU_DOMAIN_IDENTITY)
966 * Allocate the domain and initialise some of its data structures.
967 * We can't really do anything meaningful until we've added a
970 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
974 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
975 iommu_get_dma_cookie(&smmu_domain->domain))) {
980 mutex_init(&smmu_domain->init_mutex);
981 spin_lock_init(&smmu_domain->cb_lock);
983 return &smmu_domain->domain;
986 static void arm_smmu_domain_free(struct iommu_domain *domain)
988 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
991 * Free the domain resources. We assume that all devices have
992 * already been detached.
994 iommu_put_dma_cookie(domain);
995 arm_smmu_destroy_domain_context(domain);
999 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1001 struct arm_smmu_smr *smr = smmu->smrs + idx;
1002 u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
1004 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
1006 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1009 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1011 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1012 u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1013 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1014 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1016 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1017 smmu->smrs[idx].valid)
1018 reg |= S2CR_EXIDVALID;
1019 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1022 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1024 arm_smmu_write_s2cr(smmu, idx);
1026 arm_smmu_write_smr(smmu, idx);
1030 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1031 * should be called after sCR0 is written.
1033 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1035 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1042 * SMR.ID bits may not be preserved if the corresponding MASK
1043 * bits are set, so check each one separately. We can reject
1044 * masters later if they try to claim IDs outside these masks.
1046 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1047 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1048 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1049 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1051 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1052 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1053 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1054 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1057 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1059 struct arm_smmu_smr *smrs = smmu->smrs;
1060 int i, free_idx = -ENOSPC;
1062 /* Stream indexing is blissfully easy */
1066 /* Validating SMRs is... less so */
1067 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1068 if (!smrs[i].valid) {
1070 * Note the first free entry we come across, which
1071 * we'll claim in the end if nothing else matches.
1078 * If the new entry is _entirely_ matched by an existing entry,
1079 * then reuse that, with the guarantee that there also cannot
1080 * be any subsequent conflicting entries. In normal use we'd
1081 * expect simply identical entries for this case, but there's
1082 * no harm in accommodating the generalisation.
1084 if ((mask & smrs[i].mask) == mask &&
1085 !((id ^ smrs[i].id) & ~smrs[i].mask))
1088 * If the new entry has any other overlap with an existing one,
1089 * though, then there always exists at least one stream ID
1090 * which would cause a conflict, and we can't allow that risk.
1092 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1099 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1101 if (--smmu->s2crs[idx].count)
1104 smmu->s2crs[idx] = s2cr_init_val;
1106 smmu->smrs[idx].valid = false;
1111 static int arm_smmu_master_alloc_smes(struct device *dev)
1113 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1114 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1115 struct arm_smmu_device *smmu = cfg->smmu;
1116 struct arm_smmu_smr *smrs = smmu->smrs;
1117 struct iommu_group *group;
1120 mutex_lock(&smmu->stream_map_mutex);
1121 /* Figure out a viable stream map entry allocation */
1122 for_each_cfg_sme(fwspec, i, idx) {
1123 u16 sid = fwspec->ids[i];
1124 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1126 if (idx != INVALID_SMENDX) {
1131 ret = arm_smmu_find_sme(smmu, sid, mask);
1136 if (smrs && smmu->s2crs[idx].count == 0) {
1138 smrs[idx].mask = mask;
1139 smrs[idx].valid = true;
1141 smmu->s2crs[idx].count++;
1142 cfg->smendx[i] = (s16)idx;
1145 group = iommu_group_get_for_dev(dev);
1147 group = ERR_PTR(-ENOMEM);
1148 if (IS_ERR(group)) {
1149 ret = PTR_ERR(group);
1152 iommu_group_put(group);
1154 /* It worked! Now, poke the actual hardware */
1155 for_each_cfg_sme(fwspec, i, idx) {
1156 arm_smmu_write_sme(smmu, idx);
1157 smmu->s2crs[idx].group = group;
1160 mutex_unlock(&smmu->stream_map_mutex);
1165 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1166 cfg->smendx[i] = INVALID_SMENDX;
1168 mutex_unlock(&smmu->stream_map_mutex);
1172 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1174 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1175 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1178 mutex_lock(&smmu->stream_map_mutex);
1179 for_each_cfg_sme(fwspec, i, idx) {
1180 if (arm_smmu_free_sme(smmu, idx))
1181 arm_smmu_write_sme(smmu, idx);
1182 cfg->smendx[i] = INVALID_SMENDX;
1184 mutex_unlock(&smmu->stream_map_mutex);
1187 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1188 struct iommu_fwspec *fwspec)
1190 struct arm_smmu_device *smmu = smmu_domain->smmu;
1191 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1192 u8 cbndx = smmu_domain->cfg.cbndx;
1193 enum arm_smmu_s2cr_type type;
1196 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1197 type = S2CR_TYPE_BYPASS;
1199 type = S2CR_TYPE_TRANS;
1201 for_each_cfg_sme(fwspec, i, idx) {
1202 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1205 s2cr[idx].type = type;
1206 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1207 s2cr[idx].cbndx = cbndx;
1208 arm_smmu_write_s2cr(smmu, idx);
1213 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1216 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1217 struct arm_smmu_device *smmu;
1218 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1220 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1221 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1226 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1227 * domains between of_xlate() and add_device() - we have no way to cope
1228 * with that, so until ARM gets converted to rely on groups and default
1229 * domains, just say no (but more politely than by dereferencing NULL).
1230 * This should be at least a WARN_ON once that's sorted.
1232 if (!fwspec->iommu_priv)
1235 smmu = fwspec_smmu(fwspec);
1236 /* Ensure that the domain is finalised */
1237 ret = arm_smmu_init_domain_context(domain, smmu);
1242 * Sanity check the domain. We don't support domains across
1245 if (smmu_domain->smmu != smmu) {
1247 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1248 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1252 /* Looks ok, so add the device to the domain */
1253 return arm_smmu_domain_add_master(smmu_domain, fwspec);
1256 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1257 phys_addr_t paddr, size_t size, int prot)
1259 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1264 return ops->map(ops, iova, paddr, size, prot);
1267 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1270 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1275 return ops->unmap(ops, iova, size);
1278 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1281 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1282 struct arm_smmu_device *smmu = smmu_domain->smmu;
1283 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1284 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1285 struct device *dev = smmu->dev;
1286 void __iomem *cb_base;
1289 unsigned long va, flags;
1291 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1293 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1294 /* ATS1 registers can only be written atomically */
1295 va = iova & ~0xfffUL;
1296 if (smmu->version == ARM_SMMU_V2)
1297 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1298 else /* Register is only 32-bit in v1 */
1299 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1301 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1302 !(tmp & ATSR_ACTIVE), 5, 50)) {
1303 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1305 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1307 return ops->iova_to_phys(ops, iova);
1310 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1311 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1312 if (phys & CB_PAR_F) {
1313 dev_err(dev, "translation fault!\n");
1314 dev_err(dev, "PAR = 0x%llx\n", phys);
1318 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1321 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1324 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1325 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1327 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1333 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1334 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1335 return arm_smmu_iova_to_phys_hard(domain, iova);
1337 return ops->iova_to_phys(ops, iova);
1340 static bool arm_smmu_capable(enum iommu_cap cap)
1343 case IOMMU_CAP_CACHE_COHERENCY:
1345 * Return true here as the SMMU can always send out coherent
1349 case IOMMU_CAP_NOEXEC:
1356 static int arm_smmu_match_node(struct device *dev, void *data)
1358 return dev->fwnode == data;
1362 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1364 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1365 fwnode, arm_smmu_match_node);
1367 return dev ? dev_get_drvdata(dev) : NULL;
1370 static int arm_smmu_add_device(struct device *dev)
1372 struct arm_smmu_device *smmu;
1373 struct arm_smmu_master_cfg *cfg;
1374 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1377 if (using_legacy_binding) {
1378 ret = arm_smmu_register_legacy_master(dev, &smmu);
1381 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1382 * will allocate/initialise a new one. Thus we need to update fwspec for
1385 fwspec = dev->iommu_fwspec;
1388 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1389 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1395 for (i = 0; i < fwspec->num_ids; i++) {
1396 u16 sid = fwspec->ids[i];
1397 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1399 if (sid & ~smmu->streamid_mask) {
1400 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1401 sid, smmu->streamid_mask);
1404 if (mask & ~smmu->smr_mask_mask) {
1405 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1406 mask, smmu->smr_mask_mask);
1412 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1418 fwspec->iommu_priv = cfg;
1420 cfg->smendx[i] = INVALID_SMENDX;
1422 ret = arm_smmu_master_alloc_smes(dev);
1426 iommu_device_link(&smmu->iommu, dev);
1433 iommu_fwspec_free(dev);
1437 static void arm_smmu_remove_device(struct device *dev)
1439 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1440 struct arm_smmu_master_cfg *cfg;
1441 struct arm_smmu_device *smmu;
1444 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1447 cfg = fwspec->iommu_priv;
1450 iommu_device_unlink(&smmu->iommu, dev);
1451 arm_smmu_master_free_smes(fwspec);
1452 iommu_group_remove_device(dev);
1453 kfree(fwspec->iommu_priv);
1454 iommu_fwspec_free(dev);
1457 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1459 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1460 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1461 struct iommu_group *group = NULL;
1464 for_each_cfg_sme(fwspec, i, idx) {
1465 if (group && smmu->s2crs[idx].group &&
1466 group != smmu->s2crs[idx].group)
1467 return ERR_PTR(-EINVAL);
1469 group = smmu->s2crs[idx].group;
1473 return iommu_group_ref_get(group);
1475 if (dev_is_pci(dev))
1476 group = pci_device_group(dev);
1478 group = generic_device_group(dev);
1483 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1484 enum iommu_attr attr, void *data)
1486 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1488 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1492 case DOMAIN_ATTR_NESTING:
1493 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1500 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1501 enum iommu_attr attr, void *data)
1504 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1506 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1509 mutex_lock(&smmu_domain->init_mutex);
1512 case DOMAIN_ATTR_NESTING:
1513 if (smmu_domain->smmu) {
1519 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1521 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1529 mutex_unlock(&smmu_domain->init_mutex);
1533 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1537 if (args->args_count > 0)
1538 fwid |= (u16)args->args[0];
1540 if (args->args_count > 1)
1541 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1542 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1543 fwid |= (u16)mask << SMR_MASK_SHIFT;
1545 return iommu_fwspec_add_ids(dev, &fwid, 1);
1548 static void arm_smmu_get_resv_regions(struct device *dev,
1549 struct list_head *head)
1551 struct iommu_resv_region *region;
1552 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1554 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1555 prot, IOMMU_RESV_SW_MSI);
1559 list_add_tail(®ion->list, head);
1561 iommu_dma_get_resv_regions(dev, head);
1564 static void arm_smmu_put_resv_regions(struct device *dev,
1565 struct list_head *head)
1567 struct iommu_resv_region *entry, *next;
1569 list_for_each_entry_safe(entry, next, head, list)
1573 static struct iommu_ops arm_smmu_ops = {
1574 .capable = arm_smmu_capable,
1575 .domain_alloc = arm_smmu_domain_alloc,
1576 .domain_free = arm_smmu_domain_free,
1577 .attach_dev = arm_smmu_attach_dev,
1578 .map = arm_smmu_map,
1579 .unmap = arm_smmu_unmap,
1580 .map_sg = default_iommu_map_sg,
1581 .iova_to_phys = arm_smmu_iova_to_phys,
1582 .add_device = arm_smmu_add_device,
1583 .remove_device = arm_smmu_remove_device,
1584 .device_group = arm_smmu_device_group,
1585 .domain_get_attr = arm_smmu_domain_get_attr,
1586 .domain_set_attr = arm_smmu_domain_set_attr,
1587 .of_xlate = arm_smmu_of_xlate,
1588 .get_resv_regions = arm_smmu_get_resv_regions,
1589 .put_resv_regions = arm_smmu_put_resv_regions,
1590 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1593 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1595 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1599 /* clear global FSR */
1600 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1601 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1604 * Reset stream mapping groups: Initial values mark all SMRn as
1605 * invalid and all S2CRn as bypass unless overridden.
1607 for (i = 0; i < smmu->num_mapping_groups; ++i)
1608 arm_smmu_write_sme(smmu, i);
1610 if (smmu->model == ARM_MMU500) {
1612 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1613 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1614 * bit is only present in MMU-500r2 onwards.
1616 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1617 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1618 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1620 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1622 * Allow unmatched Stream IDs to allocate bypass
1623 * TLB entries for reduced latency.
1625 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN;
1626 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1629 /* Make sure all context banks are disabled and clear CB_FSR */
1630 for (i = 0; i < smmu->num_context_banks; ++i) {
1631 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1633 arm_smmu_write_context_bank(smmu, i);
1634 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1636 * Disable MMU-500's not-particularly-beneficial next-page
1637 * prefetcher for the sake of errata #841119 and #826419.
1639 if (smmu->model == ARM_MMU500) {
1640 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1641 reg &= ~ARM_MMU500_ACTLR_CPRE;
1642 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1646 /* Invalidate the TLB, just in case */
1647 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1648 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1650 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1652 /* Enable fault reporting */
1653 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1655 /* Disable TLB broadcasting. */
1656 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1658 /* Enable client access, handling unmatched streams as appropriate */
1659 reg &= ~sCR0_CLIENTPD;
1663 reg &= ~sCR0_USFCFG;
1665 /* Disable forced broadcasting */
1668 /* Don't upgrade barriers */
1669 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1671 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1672 reg |= sCR0_VMID16EN;
1674 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1675 reg |= sCR0_EXIDENABLE;
1677 /* Push the button */
1678 arm_smmu_tlb_sync_global(smmu);
1679 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1682 static int arm_smmu_id_size_to_bits(int size)
1701 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1704 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1706 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1709 dev_notice(smmu->dev, "probing hardware configuration...\n");
1710 dev_notice(smmu->dev, "SMMUv%d with:\n",
1711 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1714 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1716 /* Restrict available stages based on module parameter */
1717 if (force_stage == 1)
1718 id &= ~(ID0_S2TS | ID0_NTS);
1719 else if (force_stage == 2)
1720 id &= ~(ID0_S1TS | ID0_NTS);
1722 if (id & ID0_S1TS) {
1723 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1724 dev_notice(smmu->dev, "\tstage 1 translation\n");
1727 if (id & ID0_S2TS) {
1728 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1729 dev_notice(smmu->dev, "\tstage 2 translation\n");
1733 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1734 dev_notice(smmu->dev, "\tnested translation\n");
1737 if (!(smmu->features &
1738 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1739 dev_err(smmu->dev, "\tno translation support!\n");
1743 if ((id & ID0_S1TS) &&
1744 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1745 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1746 dev_notice(smmu->dev, "\taddress translation ops\n");
1750 * In order for DMA API calls to work properly, we must defer to what
1751 * the FW says about coherency, regardless of what the hardware claims.
1752 * Fortunately, this also opens up a workaround for systems where the
1753 * ID register value has ended up configured incorrectly.
1755 cttw_reg = !!(id & ID0_CTTW);
1756 if (cttw_fw || cttw_reg)
1757 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1758 cttw_fw ? "" : "non-");
1759 if (cttw_fw != cttw_reg)
1760 dev_notice(smmu->dev,
1761 "\t(IDR0.CTTW overridden by FW configuration)\n");
1763 /* Max. number of entries we have for stream matching/indexing */
1764 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1765 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1768 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1770 smmu->streamid_mask = size - 1;
1772 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1773 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1776 "stream-matching supported, but no SMRs present!\n");
1780 /* Zero-initialised to mark as invalid */
1781 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1786 dev_notice(smmu->dev,
1787 "\tstream matching with %lu register groups", size);
1789 /* s2cr->type == 0 means translation, so initialise explicitly */
1790 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1794 for (i = 0; i < size; i++)
1795 smmu->s2crs[i] = s2cr_init_val;
1797 smmu->num_mapping_groups = size;
1798 mutex_init(&smmu->stream_map_mutex);
1799 spin_lock_init(&smmu->global_sync_lock);
1801 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1802 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1803 if (!(id & ID0_PTFS_NO_AARCH32S))
1804 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1808 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1809 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1811 /* Check for size mismatch of SMMU address space from mapped region */
1812 size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1813 size <<= smmu->pgshift;
1814 if (smmu->cb_base != gr0_base + size)
1816 "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1817 size * 2, (smmu->cb_base - gr0_base) * 2);
1819 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1820 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1821 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1822 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1825 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1826 smmu->num_context_banks, smmu->num_s2_context_banks);
1828 * Cavium CN88xx erratum #27704.
1829 * Ensure ASID and VMID allocation is unique across all SMMUs in
1832 if (smmu->model == CAVIUM_SMMUV2) {
1833 smmu->cavium_id_base =
1834 atomic_add_return(smmu->num_context_banks,
1835 &cavium_smmu_context_count);
1836 smmu->cavium_id_base -= smmu->num_context_banks;
1837 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1839 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1840 sizeof(*smmu->cbs), GFP_KERNEL);
1845 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1846 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1847 smmu->ipa_size = size;
1849 /* The output mask is also applied for bypass */
1850 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1851 smmu->pa_size = size;
1853 if (id & ID2_VMID16)
1854 smmu->features |= ARM_SMMU_FEAT_VMID16;
1857 * What the page table walker can address actually depends on which
1858 * descriptor format is in use, but since a) we don't know that yet,
1859 * and b) it can vary per context bank, this will have to do...
1861 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1863 "failed to set DMA mask for table walker\n");
1865 if (smmu->version < ARM_SMMU_V2) {
1866 smmu->va_size = smmu->ipa_size;
1867 if (smmu->version == ARM_SMMU_V1_64K)
1868 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1870 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1871 smmu->va_size = arm_smmu_id_size_to_bits(size);
1872 if (id & ID2_PTFS_4K)
1873 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1874 if (id & ID2_PTFS_16K)
1875 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1876 if (id & ID2_PTFS_64K)
1877 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1880 /* Now we've corralled the various formats, what'll it do? */
1881 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1882 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1883 if (smmu->features &
1884 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1885 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1886 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1887 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1888 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1889 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1891 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1892 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1894 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1895 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1896 smmu->pgsize_bitmap);
1899 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1900 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1901 smmu->va_size, smmu->ipa_size);
1903 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1904 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1905 smmu->ipa_size, smmu->pa_size);
1910 struct arm_smmu_match_data {
1911 enum arm_smmu_arch_version version;
1912 enum arm_smmu_implementation model;
1915 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1916 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1918 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1919 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1920 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1921 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1922 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1923 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1925 static const struct of_device_id arm_smmu_of_match[] = {
1926 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1927 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1928 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1929 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1930 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1931 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1932 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1935 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1938 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1943 case ACPI_IORT_SMMU_V1:
1944 case ACPI_IORT_SMMU_CORELINK_MMU400:
1945 smmu->version = ARM_SMMU_V1;
1946 smmu->model = GENERIC_SMMU;
1948 case ACPI_IORT_SMMU_CORELINK_MMU401:
1949 smmu->version = ARM_SMMU_V1_64K;
1950 smmu->model = GENERIC_SMMU;
1952 case ACPI_IORT_SMMU_V2:
1953 smmu->version = ARM_SMMU_V2;
1954 smmu->model = GENERIC_SMMU;
1956 case ACPI_IORT_SMMU_CORELINK_MMU500:
1957 smmu->version = ARM_SMMU_V2;
1958 smmu->model = ARM_MMU500;
1960 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1961 smmu->version = ARM_SMMU_V2;
1962 smmu->model = CAVIUM_SMMUV2;
1971 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1972 struct arm_smmu_device *smmu)
1974 struct device *dev = smmu->dev;
1975 struct acpi_iort_node *node =
1976 *(struct acpi_iort_node **)dev_get_platdata(dev);
1977 struct acpi_iort_smmu *iort_smmu;
1980 /* Retrieve SMMU1/2 specific data */
1981 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1983 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1987 /* Ignore the configuration access interrupt */
1988 smmu->num_global_irqs = 1;
1990 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1991 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1996 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1997 struct arm_smmu_device *smmu)
2003 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2004 struct arm_smmu_device *smmu)
2006 const struct arm_smmu_match_data *data;
2007 struct device *dev = &pdev->dev;
2008 bool legacy_binding;
2010 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2011 &smmu->num_global_irqs)) {
2012 dev_err(dev, "missing #global-interrupts property\n");
2016 data = of_device_get_match_data(dev);
2017 smmu->version = data->version;
2018 smmu->model = data->model;
2020 parse_driver_options(smmu);
2022 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2023 if (legacy_binding && !using_generic_binding) {
2024 if (!using_legacy_binding)
2025 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2026 using_legacy_binding = true;
2027 } else if (!legacy_binding && !using_legacy_binding) {
2028 using_generic_binding = true;
2030 dev_err(dev, "not probing due to mismatched DT properties\n");
2034 if (of_dma_is_coherent(dev->of_node))
2035 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2040 static void arm_smmu_bus_init(void)
2042 /* Oh, for a proper bus abstraction */
2043 if (!iommu_present(&platform_bus_type))
2044 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2045 #ifdef CONFIG_ARM_AMBA
2046 if (!iommu_present(&amba_bustype))
2047 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2050 if (!iommu_present(&pci_bus_type)) {
2052 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2057 static int arm_smmu_device_probe(struct platform_device *pdev)
2059 struct resource *res;
2060 resource_size_t ioaddr;
2061 struct arm_smmu_device *smmu;
2062 struct device *dev = &pdev->dev;
2063 int num_irqs, i, err;
2065 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2067 dev_err(dev, "failed to allocate arm_smmu_device\n");
2073 err = arm_smmu_device_dt_probe(pdev, smmu);
2075 err = arm_smmu_device_acpi_probe(pdev, smmu);
2080 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2081 ioaddr = res->start;
2082 smmu->base = devm_ioremap_resource(dev, res);
2083 if (IS_ERR(smmu->base))
2084 return PTR_ERR(smmu->base);
2085 smmu->cb_base = smmu->base + resource_size(res) / 2;
2088 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2090 if (num_irqs > smmu->num_global_irqs)
2091 smmu->num_context_irqs++;
2094 if (!smmu->num_context_irqs) {
2095 dev_err(dev, "found %d interrupts but expected at least %d\n",
2096 num_irqs, smmu->num_global_irqs + 1);
2100 smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
2103 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2107 for (i = 0; i < num_irqs; ++i) {
2108 int irq = platform_get_irq(pdev, i);
2111 dev_err(dev, "failed to get irq index %d\n", i);
2114 smmu->irqs[i] = irq;
2117 err = arm_smmu_device_cfg_probe(smmu);
2121 if (smmu->version == ARM_SMMU_V2) {
2122 if (smmu->num_context_banks > smmu->num_context_irqs) {
2124 "found only %d context irq(s) but %d required\n",
2125 smmu->num_context_irqs, smmu->num_context_banks);
2129 /* Ignore superfluous interrupts */
2130 smmu->num_context_irqs = smmu->num_context_banks;
2133 for (i = 0; i < smmu->num_global_irqs; ++i) {
2134 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2135 arm_smmu_global_fault,
2137 "arm-smmu global fault",
2140 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2146 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2147 "smmu.%pa", &ioaddr);
2149 dev_err(dev, "Failed to register iommu in sysfs\n");
2153 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2154 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2156 err = iommu_device_register(&smmu->iommu);
2158 dev_err(dev, "Failed to register iommu\n");
2162 platform_set_drvdata(pdev, smmu);
2163 arm_smmu_device_reset(smmu);
2164 arm_smmu_test_smr_masks(smmu);
2167 * For ACPI and generic DT bindings, an SMMU will be probed before
2168 * any device which might need it, so we want the bus ops in place
2169 * ready to handle default domain setup as soon as any SMMU exists.
2171 if (!using_legacy_binding)
2172 arm_smmu_bus_init();
2178 * With the legacy DT binding in play, though, we have no guarantees about
2179 * probe order, but then we're also not doing default domains, so we can
2180 * delay setting bus ops until we're sure every possible SMMU is ready,
2181 * and that way ensure that no add_device() calls get missed.
2183 static int arm_smmu_legacy_bus_init(void)
2185 if (using_legacy_binding)
2186 arm_smmu_bus_init();
2189 device_initcall_sync(arm_smmu_legacy_bus_init);
2191 static int arm_smmu_device_remove(struct platform_device *pdev)
2193 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2198 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2199 dev_err(&pdev->dev, "removing device with active domains!\n");
2201 /* Turn the thing off */
2202 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2206 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2208 arm_smmu_device_remove(pdev);
2211 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2213 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2215 arm_smmu_device_reset(smmu);
2219 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2221 static struct platform_driver arm_smmu_driver = {
2224 .of_match_table = of_match_ptr(arm_smmu_of_match),
2225 .pm = &arm_smmu_pm_ops,
2227 .probe = arm_smmu_device_probe,
2228 .remove = arm_smmu_device_remove,
2229 .shutdown = arm_smmu_device_shutdown,
2231 module_platform_driver(arm_smmu_driver);
2233 IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", NULL);
2234 IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", NULL);
2235 IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", NULL);
2236 IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", NULL);
2237 IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", NULL);
2238 IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", NULL);
2240 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2241 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2242 MODULE_LICENSE("GPL v2");