GNU Linux-libre 4.14.251-gnu1
[releases.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  *      - Extended Stream ID (16 bit)
28  */
29
30 #define pr_fmt(fmt) "arm-smmu: " fmt
31
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
53
54 #include <linux/amba/bus.h>
55
56 #include "io-pgtable.h"
57 #include "arm-smmu-regs.h"
58
59 /*
60  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
61  * global register space are still, in fact, using a hypervisor to mediate it
62  * by trapping and emulating register accesses. Sadly, some deployed versions
63  * of said trapping code have bugs wherein they go horribly wrong for stores
64  * using r31 (i.e. XZR/WZR) as the source register.
65  */
66 #define QCOM_DUMMY_VAL -1
67
68 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
69
70 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
71 #define ARM_MMU500_ACR_SMTNMB_TLBEN     (1 << 8)
72
73 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
74 #define TLB_SPIN_COUNT                  10
75
76 /* Maximum number of context banks per SMMU */
77 #define ARM_SMMU_MAX_CBS                128
78
79 /* SMMU global address space */
80 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
81 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
82
83 /*
84  * SMMU global address space with conditional offset to access secure
85  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
86  * nsGFSYNR0: 0x450)
87  */
88 #define ARM_SMMU_GR0_NS(smmu)                                           \
89         ((smmu)->base +                                                 \
90                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
91                         ? 0x400 : 0))
92
93 /*
94  * Some 64-bit registers only make sense to write atomically, but in such
95  * cases all the data relevant to AArch32 formats lies within the lower word,
96  * therefore this actually makes more sense than it might first appear.
97  */
98 #ifdef CONFIG_64BIT
99 #define smmu_write_atomic_lq            writeq_relaxed
100 #else
101 #define smmu_write_atomic_lq            writel_relaxed
102 #endif
103
104 /* Translation context bank */
105 #define ARM_SMMU_CB(smmu, n)    ((smmu)->cb_base + ((n) << (smmu)->pgshift))
106
107 #define MSI_IOVA_BASE                   0x8000000
108 #define MSI_IOVA_LENGTH                 0x100000
109
110 static int force_stage;
111 module_param(force_stage, int, S_IRUGO);
112 MODULE_PARM_DESC(force_stage,
113         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
114 static bool disable_bypass;
115 module_param(disable_bypass, bool, S_IRUGO);
116 MODULE_PARM_DESC(disable_bypass,
117         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
118
119 enum arm_smmu_arch_version {
120         ARM_SMMU_V1,
121         ARM_SMMU_V1_64K,
122         ARM_SMMU_V2,
123 };
124
125 enum arm_smmu_implementation {
126         GENERIC_SMMU,
127         ARM_MMU500,
128         CAVIUM_SMMUV2,
129         QCOM_SMMUV2,
130 };
131
132 /* Until ACPICA headers cover IORT rev. C */
133 #ifndef ACPI_IORT_SMMU_CORELINK_MMU401
134 #define ACPI_IORT_SMMU_CORELINK_MMU401  0x4
135 #endif
136 #ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX
137 #define ACPI_IORT_SMMU_CAVIUM_THUNDERX  0x5
138 #endif
139
140 struct arm_smmu_s2cr {
141         struct iommu_group              *group;
142         int                             count;
143         enum arm_smmu_s2cr_type         type;
144         enum arm_smmu_s2cr_privcfg      privcfg;
145         u8                              cbndx;
146 };
147
148 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
149         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
150 }
151
152 struct arm_smmu_smr {
153         u16                             mask;
154         u16                             id;
155         bool                            valid;
156 };
157
158 struct arm_smmu_cb {
159         u64                             ttbr[2];
160         u32                             tcr[2];
161         u32                             mair[2];
162         struct arm_smmu_cfg             *cfg;
163 };
164
165 struct arm_smmu_master_cfg {
166         struct arm_smmu_device          *smmu;
167         s16                             smendx[];
168 };
169 #define INVALID_SMENDX                  -1
170 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
171 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
172 #define fwspec_smendx(fw, i) \
173         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
174 #define for_each_cfg_sme(fw, i, idx) \
175         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
176
177 struct arm_smmu_device {
178         struct device                   *dev;
179
180         void __iomem                    *base;
181         void __iomem                    *cb_base;
182         unsigned long                   pgshift;
183
184 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
185 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
186 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
187 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
188 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
189 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
190 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
191 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
192 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
193 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
194 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
195 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
196 #define ARM_SMMU_FEAT_EXIDS             (1 << 12)
197         u32                             features;
198
199 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
200         u32                             options;
201         enum arm_smmu_arch_version      version;
202         enum arm_smmu_implementation    model;
203
204         u32                             num_context_banks;
205         u32                             num_s2_context_banks;
206         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
207         struct arm_smmu_cb              *cbs;
208         atomic_t                        irptndx;
209
210         u32                             num_mapping_groups;
211         u16                             streamid_mask;
212         u16                             smr_mask_mask;
213         struct arm_smmu_smr             *smrs;
214         struct arm_smmu_s2cr            *s2crs;
215         struct mutex                    stream_map_mutex;
216
217         unsigned long                   va_size;
218         unsigned long                   ipa_size;
219         unsigned long                   pa_size;
220         unsigned long                   pgsize_bitmap;
221
222         u32                             num_global_irqs;
223         u32                             num_context_irqs;
224         unsigned int                    *irqs;
225
226         u32                             cavium_id_base; /* Specific to Cavium */
227
228         spinlock_t                      global_sync_lock;
229
230         /* IOMMU core code handle */
231         struct iommu_device             iommu;
232 };
233
234 enum arm_smmu_context_fmt {
235         ARM_SMMU_CTX_FMT_NONE,
236         ARM_SMMU_CTX_FMT_AARCH64,
237         ARM_SMMU_CTX_FMT_AARCH32_L,
238         ARM_SMMU_CTX_FMT_AARCH32_S,
239 };
240
241 struct arm_smmu_cfg {
242         u8                              cbndx;
243         u8                              irptndx;
244         union {
245                 u16                     asid;
246                 u16                     vmid;
247         };
248         u32                             cbar;
249         enum arm_smmu_context_fmt       fmt;
250 };
251 #define INVALID_IRPTNDX                 0xff
252
253 enum arm_smmu_domain_stage {
254         ARM_SMMU_DOMAIN_S1 = 0,
255         ARM_SMMU_DOMAIN_S2,
256         ARM_SMMU_DOMAIN_NESTED,
257         ARM_SMMU_DOMAIN_BYPASS,
258 };
259
260 struct arm_smmu_domain {
261         struct arm_smmu_device          *smmu;
262         struct io_pgtable_ops           *pgtbl_ops;
263         struct arm_smmu_cfg             cfg;
264         enum arm_smmu_domain_stage      stage;
265         struct mutex                    init_mutex; /* Protects smmu pointer */
266         spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
267         struct iommu_domain             domain;
268 };
269
270 struct arm_smmu_option_prop {
271         u32 opt;
272         const char *prop;
273 };
274
275 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
276
277 static bool using_legacy_binding, using_generic_binding;
278
279 static struct arm_smmu_option_prop arm_smmu_options[] = {
280         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
281         { 0, NULL},
282 };
283
284 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
285 {
286         return container_of(dom, struct arm_smmu_domain, domain);
287 }
288
289 static void parse_driver_options(struct arm_smmu_device *smmu)
290 {
291         int i = 0;
292
293         do {
294                 if (of_property_read_bool(smmu->dev->of_node,
295                                                 arm_smmu_options[i].prop)) {
296                         smmu->options |= arm_smmu_options[i].opt;
297                         dev_notice(smmu->dev, "option %s\n",
298                                 arm_smmu_options[i].prop);
299                 }
300         } while (arm_smmu_options[++i].opt);
301 }
302
303 static struct device_node *dev_get_dev_node(struct device *dev)
304 {
305         if (dev_is_pci(dev)) {
306                 struct pci_bus *bus = to_pci_dev(dev)->bus;
307
308                 while (!pci_is_root_bus(bus))
309                         bus = bus->parent;
310                 return of_node_get(bus->bridge->parent->of_node);
311         }
312
313         return of_node_get(dev->of_node);
314 }
315
316 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
317 {
318         *((__be32 *)data) = cpu_to_be32(alias);
319         return 0; /* Continue walking */
320 }
321
322 static int __find_legacy_master_phandle(struct device *dev, void *data)
323 {
324         struct of_phandle_iterator *it = *(void **)data;
325         struct device_node *np = it->node;
326         int err;
327
328         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
329                             "#stream-id-cells", 0)
330                 if (it->node == np) {
331                         *(void **)data = dev;
332                         return 1;
333                 }
334         it->node = np;
335         return err == -ENOENT ? 0 : err;
336 }
337
338 static struct platform_driver arm_smmu_driver;
339 static struct iommu_ops arm_smmu_ops;
340
341 static int arm_smmu_register_legacy_master(struct device *dev,
342                                            struct arm_smmu_device **smmu)
343 {
344         struct device *smmu_dev;
345         struct device_node *np;
346         struct of_phandle_iterator it;
347         void *data = &it;
348         u32 *sids;
349         __be32 pci_sid;
350         int err;
351
352         np = dev_get_dev_node(dev);
353         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
354                 of_node_put(np);
355                 return -ENODEV;
356         }
357
358         it.node = np;
359         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
360                                      __find_legacy_master_phandle);
361         smmu_dev = data;
362         of_node_put(np);
363         if (err == 0)
364                 return -ENODEV;
365         if (err < 0)
366                 return err;
367
368         if (dev_is_pci(dev)) {
369                 /* "mmu-masters" assumes Stream ID == Requester ID */
370                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
371                                        &pci_sid);
372                 it.cur = &pci_sid;
373                 it.cur_count = 1;
374         }
375
376         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
377                                 &arm_smmu_ops);
378         if (err)
379                 return err;
380
381         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
382         if (!sids)
383                 return -ENOMEM;
384
385         *smmu = dev_get_drvdata(smmu_dev);
386         of_phandle_iterator_args(&it, sids, it.cur_count);
387         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
388         kfree(sids);
389         return err;
390 }
391
392 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
393 {
394         int idx;
395
396         do {
397                 idx = find_next_zero_bit(map, end, start);
398                 if (idx == end)
399                         return -ENOSPC;
400         } while (test_and_set_bit(idx, map));
401
402         return idx;
403 }
404
405 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
406 {
407         clear_bit(idx, map);
408 }
409
410 /* Wait for any pending TLB invalidations to complete */
411 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
412                                 void __iomem *sync, void __iomem *status)
413 {
414         unsigned int spin_cnt, delay;
415
416         writel_relaxed(QCOM_DUMMY_VAL, sync);
417         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
418                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
419                         if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
420                                 return;
421                         cpu_relax();
422                 }
423                 udelay(delay);
424         }
425         dev_err_ratelimited(smmu->dev,
426                             "TLB sync timed out -- SMMU may be deadlocked\n");
427 }
428
429 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
430 {
431         void __iomem *base = ARM_SMMU_GR0(smmu);
432         unsigned long flags;
433
434         spin_lock_irqsave(&smmu->global_sync_lock, flags);
435         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
436                             base + ARM_SMMU_GR0_sTLBGSTATUS);
437         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
438 }
439
440 static void arm_smmu_tlb_sync_context(void *cookie)
441 {
442         struct arm_smmu_domain *smmu_domain = cookie;
443         struct arm_smmu_device *smmu = smmu_domain->smmu;
444         void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
445         unsigned long flags;
446
447         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
448         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
449                             base + ARM_SMMU_CB_TLBSTATUS);
450         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
451 }
452
453 static void arm_smmu_tlb_sync_vmid(void *cookie)
454 {
455         struct arm_smmu_domain *smmu_domain = cookie;
456
457         arm_smmu_tlb_sync_global(smmu_domain->smmu);
458 }
459
460 static void arm_smmu_tlb_inv_context_s1(void *cookie)
461 {
462         struct arm_smmu_domain *smmu_domain = cookie;
463         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
464         void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
465
466         writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
467         arm_smmu_tlb_sync_context(cookie);
468 }
469
470 static void arm_smmu_tlb_inv_context_s2(void *cookie)
471 {
472         struct arm_smmu_domain *smmu_domain = cookie;
473         struct arm_smmu_device *smmu = smmu_domain->smmu;
474         void __iomem *base = ARM_SMMU_GR0(smmu);
475
476         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
477         arm_smmu_tlb_sync_global(smmu);
478 }
479
480 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
481                                           size_t granule, bool leaf, void *cookie)
482 {
483         struct arm_smmu_domain *smmu_domain = cookie;
484         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
485         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
486         void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
487
488         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
489                 wmb();
490
491         if (stage1) {
492                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
493
494                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
495                         iova &= ~12UL;
496                         iova |= cfg->asid;
497                         do {
498                                 writel_relaxed(iova, reg);
499                                 iova += granule;
500                         } while (size -= granule);
501                 } else {
502                         iova >>= 12;
503                         iova |= (u64)cfg->asid << 48;
504                         do {
505                                 writeq_relaxed(iova, reg);
506                                 iova += granule >> 12;
507                         } while (size -= granule);
508                 }
509         } else {
510                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
511                               ARM_SMMU_CB_S2_TLBIIPAS2;
512                 iova >>= 12;
513                 do {
514                         smmu_write_atomic_lq(iova, reg);
515                         iova += granule >> 12;
516                 } while (size -= granule);
517         }
518 }
519
520 /*
521  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
522  * almost negligible, but the benefit of getting the first one in as far ahead
523  * of the sync as possible is significant, hence we don't just make this a
524  * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
525  */
526 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
527                                          size_t granule, bool leaf, void *cookie)
528 {
529         struct arm_smmu_domain *smmu_domain = cookie;
530         void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
531
532         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
533                 wmb();
534
535         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
536 }
537
538 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
539         .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
540         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
541         .tlb_sync       = arm_smmu_tlb_sync_context,
542 };
543
544 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
545         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
546         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
547         .tlb_sync       = arm_smmu_tlb_sync_context,
548 };
549
550 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
551         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
552         .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
553         .tlb_sync       = arm_smmu_tlb_sync_vmid,
554 };
555
556 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
557 {
558         u32 fsr, fsynr;
559         unsigned long iova;
560         struct iommu_domain *domain = dev;
561         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
562         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
563         struct arm_smmu_device *smmu = smmu_domain->smmu;
564         void __iomem *cb_base;
565
566         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
567         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
568
569         if (!(fsr & FSR_FAULT))
570                 return IRQ_NONE;
571
572         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
573         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
574
575         dev_err_ratelimited(smmu->dev,
576         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
577                             fsr, iova, fsynr, cfg->cbndx);
578
579         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
580         return IRQ_HANDLED;
581 }
582
583 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
584 {
585         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
586         struct arm_smmu_device *smmu = dev;
587         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
588
589         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
590         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
591         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
592         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
593
594         if (!gfsr)
595                 return IRQ_NONE;
596
597         dev_err_ratelimited(smmu->dev,
598                 "Unexpected global fault, this could be serious\n");
599         dev_err_ratelimited(smmu->dev,
600                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
601                 gfsr, gfsynr0, gfsynr1, gfsynr2);
602
603         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
604         return IRQ_HANDLED;
605 }
606
607 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
608                                        struct io_pgtable_cfg *pgtbl_cfg)
609 {
610         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
611         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
612         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
613
614         cb->cfg = cfg;
615
616         /* TTBCR */
617         if (stage1) {
618                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
619                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
620                 } else {
621                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
622                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
623                         cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
624                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
625                                 cb->tcr[1] |= TTBCR2_AS;
626                 }
627         } else {
628                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
629         }
630
631         /* TTBRs */
632         if (stage1) {
633                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
634                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
635                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
636                 } else {
637                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
638                         cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
639                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
640                         cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
641                 }
642         } else {
643                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
644         }
645
646         /* MAIRs (stage-1 only) */
647         if (stage1) {
648                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
649                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
650                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
651                 } else {
652                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
653                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
654                 }
655         }
656 }
657
658 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
659 {
660         u32 reg;
661         bool stage1;
662         struct arm_smmu_cb *cb = &smmu->cbs[idx];
663         struct arm_smmu_cfg *cfg = cb->cfg;
664         void __iomem *cb_base, *gr1_base;
665
666         cb_base = ARM_SMMU_CB(smmu, idx);
667
668         /* Unassigned context banks only need disabling */
669         if (!cfg) {
670                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
671                 return;
672         }
673
674         gr1_base = ARM_SMMU_GR1(smmu);
675         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
676
677         /* CBA2R */
678         if (smmu->version > ARM_SMMU_V1) {
679                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
680                         reg = CBA2R_RW64_64BIT;
681                 else
682                         reg = CBA2R_RW64_32BIT;
683                 /* 16-bit VMIDs live in CBA2R */
684                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
685                         reg |= cfg->vmid << CBA2R_VMID_SHIFT;
686
687                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
688         }
689
690         /* CBAR */
691         reg = cfg->cbar;
692         if (smmu->version < ARM_SMMU_V2)
693                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
694
695         /*
696          * Use the weakest shareability/memory types, so they are
697          * overridden by the ttbcr/pte.
698          */
699         if (stage1) {
700                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
701                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
702         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
703                 /* 8-bit VMIDs live in CBAR */
704                 reg |= cfg->vmid << CBAR_VMID_SHIFT;
705         }
706         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
707
708         /*
709          * TTBCR
710          * We must write this before the TTBRs, since it determines the
711          * access behaviour of some fields (in particular, ASID[15:8]).
712          */
713         if (stage1 && smmu->version > ARM_SMMU_V1)
714                 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
715         writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
716
717         /* TTBRs */
718         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
719                 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
720                 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
721                 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
722         } else {
723                 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
724                 if (stage1)
725                         writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
726         }
727
728         /* MAIRs (stage-1 only) */
729         if (stage1) {
730                 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
731                 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
732         }
733
734         /* SCTLR */
735         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
736         if (stage1)
737                 reg |= SCTLR_S1_ASIDPNE;
738         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
739                 reg |= SCTLR_E;
740
741         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
742 }
743
744 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
745                                         struct arm_smmu_device *smmu)
746 {
747         int irq, start, ret = 0;
748         unsigned long ias, oas;
749         struct io_pgtable_ops *pgtbl_ops;
750         struct io_pgtable_cfg pgtbl_cfg;
751         enum io_pgtable_fmt fmt;
752         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
753         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
754         const struct iommu_gather_ops *tlb_ops;
755
756         mutex_lock(&smmu_domain->init_mutex);
757         if (smmu_domain->smmu)
758                 goto out_unlock;
759
760         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
761                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
762                 smmu_domain->smmu = smmu;
763                 goto out_unlock;
764         }
765
766         /*
767          * Mapping the requested stage onto what we support is surprisingly
768          * complicated, mainly because the spec allows S1+S2 SMMUs without
769          * support for nested translation. That means we end up with the
770          * following table:
771          *
772          * Requested        Supported        Actual
773          *     S1               N              S1
774          *     S1             S1+S2            S1
775          *     S1               S2             S2
776          *     S1               S1             S1
777          *     N                N              N
778          *     N              S1+S2            S2
779          *     N                S2             S2
780          *     N                S1             S1
781          *
782          * Note that you can't actually request stage-2 mappings.
783          */
784         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
785                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
786         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
787                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
788
789         /*
790          * Choosing a suitable context format is even more fiddly. Until we
791          * grow some way for the caller to express a preference, and/or move
792          * the decision into the io-pgtable code where it arguably belongs,
793          * just aim for the closest thing to the rest of the system, and hope
794          * that the hardware isn't esoteric enough that we can't assume AArch64
795          * support to be a superset of AArch32 support...
796          */
797         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
798                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
799         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
800             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
801             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
802             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
803                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
804         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
805             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
806                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
807                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
808                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
809
810         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
811                 ret = -EINVAL;
812                 goto out_unlock;
813         }
814
815         switch (smmu_domain->stage) {
816         case ARM_SMMU_DOMAIN_S1:
817                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
818                 start = smmu->num_s2_context_banks;
819                 ias = smmu->va_size;
820                 oas = smmu->ipa_size;
821                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
822                         fmt = ARM_64_LPAE_S1;
823                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
824                         fmt = ARM_32_LPAE_S1;
825                         ias = min(ias, 32UL);
826                         oas = min(oas, 40UL);
827                 } else {
828                         fmt = ARM_V7S;
829                         ias = min(ias, 32UL);
830                         oas = min(oas, 32UL);
831                 }
832                 tlb_ops = &arm_smmu_s1_tlb_ops;
833                 break;
834         case ARM_SMMU_DOMAIN_NESTED:
835                 /*
836                  * We will likely want to change this if/when KVM gets
837                  * involved.
838                  */
839         case ARM_SMMU_DOMAIN_S2:
840                 cfg->cbar = CBAR_TYPE_S2_TRANS;
841                 start = 0;
842                 ias = smmu->ipa_size;
843                 oas = smmu->pa_size;
844                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
845                         fmt = ARM_64_LPAE_S2;
846                 } else {
847                         fmt = ARM_32_LPAE_S2;
848                         ias = min(ias, 40UL);
849                         oas = min(oas, 40UL);
850                 }
851                 if (smmu->version == ARM_SMMU_V2)
852                         tlb_ops = &arm_smmu_s2_tlb_ops_v2;
853                 else
854                         tlb_ops = &arm_smmu_s2_tlb_ops_v1;
855                 break;
856         default:
857                 ret = -EINVAL;
858                 goto out_unlock;
859         }
860         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
861                                       smmu->num_context_banks);
862         if (ret < 0)
863                 goto out_unlock;
864
865         cfg->cbndx = ret;
866         if (smmu->version < ARM_SMMU_V2) {
867                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
868                 cfg->irptndx %= smmu->num_context_irqs;
869         } else {
870                 cfg->irptndx = cfg->cbndx;
871         }
872
873         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
874                 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
875         else
876                 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
877
878         pgtbl_cfg = (struct io_pgtable_cfg) {
879                 .pgsize_bitmap  = smmu->pgsize_bitmap,
880                 .ias            = ias,
881                 .oas            = oas,
882                 .tlb            = tlb_ops,
883                 .iommu_dev      = smmu->dev,
884         };
885
886         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
887                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
888
889         smmu_domain->smmu = smmu;
890         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
891         if (!pgtbl_ops) {
892                 ret = -ENOMEM;
893                 goto out_clear_smmu;
894         }
895
896         /* Update the domain's page sizes to reflect the page table format */
897         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
898         domain->geometry.aperture_end = (1UL << ias) - 1;
899         domain->geometry.force_aperture = true;
900
901         /* Initialise the context bank with our page table cfg */
902         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
903         arm_smmu_write_context_bank(smmu, cfg->cbndx);
904
905         /*
906          * Request context fault interrupt. Do this last to avoid the
907          * handler seeing a half-initialised domain state.
908          */
909         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
910         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
911                                IRQF_SHARED, "arm-smmu-context-fault", domain);
912         if (ret < 0) {
913                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
914                         cfg->irptndx, irq);
915                 cfg->irptndx = INVALID_IRPTNDX;
916         }
917
918         mutex_unlock(&smmu_domain->init_mutex);
919
920         /* Publish page table ops for map/unmap */
921         smmu_domain->pgtbl_ops = pgtbl_ops;
922         return 0;
923
924 out_clear_smmu:
925         smmu_domain->smmu = NULL;
926 out_unlock:
927         mutex_unlock(&smmu_domain->init_mutex);
928         return ret;
929 }
930
931 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
932 {
933         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
934         struct arm_smmu_device *smmu = smmu_domain->smmu;
935         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
936         int irq;
937
938         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
939                 return;
940
941         /*
942          * Disable the context bank and free the page tables before freeing
943          * it.
944          */
945         smmu->cbs[cfg->cbndx].cfg = NULL;
946         arm_smmu_write_context_bank(smmu, cfg->cbndx);
947
948         if (cfg->irptndx != INVALID_IRPTNDX) {
949                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
950                 devm_free_irq(smmu->dev, irq, domain);
951         }
952
953         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
954         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
955 }
956
957 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
958 {
959         struct arm_smmu_domain *smmu_domain;
960
961         if (type != IOMMU_DOMAIN_UNMANAGED &&
962             type != IOMMU_DOMAIN_DMA &&
963             type != IOMMU_DOMAIN_IDENTITY)
964                 return NULL;
965         /*
966          * Allocate the domain and initialise some of its data structures.
967          * We can't really do anything meaningful until we've added a
968          * master.
969          */
970         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
971         if (!smmu_domain)
972                 return NULL;
973
974         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
975             iommu_get_dma_cookie(&smmu_domain->domain))) {
976                 kfree(smmu_domain);
977                 return NULL;
978         }
979
980         mutex_init(&smmu_domain->init_mutex);
981         spin_lock_init(&smmu_domain->cb_lock);
982
983         return &smmu_domain->domain;
984 }
985
986 static void arm_smmu_domain_free(struct iommu_domain *domain)
987 {
988         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
989
990         /*
991          * Free the domain resources. We assume that all devices have
992          * already been detached.
993          */
994         iommu_put_dma_cookie(domain);
995         arm_smmu_destroy_domain_context(domain);
996         kfree(smmu_domain);
997 }
998
999 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1000 {
1001         struct arm_smmu_smr *smr = smmu->smrs + idx;
1002         u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
1003
1004         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
1005                 reg |= SMR_VALID;
1006         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1007 }
1008
1009 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1010 {
1011         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1012         u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1013                   (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1014                   (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1015
1016         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1017             smmu->smrs[idx].valid)
1018                 reg |= S2CR_EXIDVALID;
1019         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1020 }
1021
1022 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1023 {
1024         arm_smmu_write_s2cr(smmu, idx);
1025         if (smmu->smrs)
1026                 arm_smmu_write_smr(smmu, idx);
1027 }
1028
1029 /*
1030  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1031  * should be called after sCR0 is written.
1032  */
1033 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1034 {
1035         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1036         u32 smr;
1037
1038         if (!smmu->smrs)
1039                 return;
1040
1041         /*
1042          * SMR.ID bits may not be preserved if the corresponding MASK
1043          * bits are set, so check each one separately. We can reject
1044          * masters later if they try to claim IDs outside these masks.
1045          */
1046         smr = smmu->streamid_mask << SMR_ID_SHIFT;
1047         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1048         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1049         smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1050
1051         smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1052         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1053         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1054         smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1055 }
1056
1057 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1058 {
1059         struct arm_smmu_smr *smrs = smmu->smrs;
1060         int i, free_idx = -ENOSPC;
1061
1062         /* Stream indexing is blissfully easy */
1063         if (!smrs)
1064                 return id;
1065
1066         /* Validating SMRs is... less so */
1067         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1068                 if (!smrs[i].valid) {
1069                         /*
1070                          * Note the first free entry we come across, which
1071                          * we'll claim in the end if nothing else matches.
1072                          */
1073                         if (free_idx < 0)
1074                                 free_idx = i;
1075                         continue;
1076                 }
1077                 /*
1078                  * If the new entry is _entirely_ matched by an existing entry,
1079                  * then reuse that, with the guarantee that there also cannot
1080                  * be any subsequent conflicting entries. In normal use we'd
1081                  * expect simply identical entries for this case, but there's
1082                  * no harm in accommodating the generalisation.
1083                  */
1084                 if ((mask & smrs[i].mask) == mask &&
1085                     !((id ^ smrs[i].id) & ~smrs[i].mask))
1086                         return i;
1087                 /*
1088                  * If the new entry has any other overlap with an existing one,
1089                  * though, then there always exists at least one stream ID
1090                  * which would cause a conflict, and we can't allow that risk.
1091                  */
1092                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1093                         return -EINVAL;
1094         }
1095
1096         return free_idx;
1097 }
1098
1099 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1100 {
1101         if (--smmu->s2crs[idx].count)
1102                 return false;
1103
1104         smmu->s2crs[idx] = s2cr_init_val;
1105         if (smmu->smrs)
1106                 smmu->smrs[idx].valid = false;
1107
1108         return true;
1109 }
1110
1111 static int arm_smmu_master_alloc_smes(struct device *dev)
1112 {
1113         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1114         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1115         struct arm_smmu_device *smmu = cfg->smmu;
1116         struct arm_smmu_smr *smrs = smmu->smrs;
1117         struct iommu_group *group;
1118         int i, idx, ret;
1119
1120         mutex_lock(&smmu->stream_map_mutex);
1121         /* Figure out a viable stream map entry allocation */
1122         for_each_cfg_sme(fwspec, i, idx) {
1123                 u16 sid = fwspec->ids[i];
1124                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1125
1126                 if (idx != INVALID_SMENDX) {
1127                         ret = -EEXIST;
1128                         goto out_err;
1129                 }
1130
1131                 ret = arm_smmu_find_sme(smmu, sid, mask);
1132                 if (ret < 0)
1133                         goto out_err;
1134
1135                 idx = ret;
1136                 if (smrs && smmu->s2crs[idx].count == 0) {
1137                         smrs[idx].id = sid;
1138                         smrs[idx].mask = mask;
1139                         smrs[idx].valid = true;
1140                 }
1141                 smmu->s2crs[idx].count++;
1142                 cfg->smendx[i] = (s16)idx;
1143         }
1144
1145         group = iommu_group_get_for_dev(dev);
1146         if (!group)
1147                 group = ERR_PTR(-ENOMEM);
1148         if (IS_ERR(group)) {
1149                 ret = PTR_ERR(group);
1150                 goto out_err;
1151         }
1152         iommu_group_put(group);
1153
1154         /* It worked! Now, poke the actual hardware */
1155         for_each_cfg_sme(fwspec, i, idx) {
1156                 arm_smmu_write_sme(smmu, idx);
1157                 smmu->s2crs[idx].group = group;
1158         }
1159
1160         mutex_unlock(&smmu->stream_map_mutex);
1161         return 0;
1162
1163 out_err:
1164         while (i--) {
1165                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1166                 cfg->smendx[i] = INVALID_SMENDX;
1167         }
1168         mutex_unlock(&smmu->stream_map_mutex);
1169         return ret;
1170 }
1171
1172 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1173 {
1174         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1175         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1176         int i, idx;
1177
1178         mutex_lock(&smmu->stream_map_mutex);
1179         for_each_cfg_sme(fwspec, i, idx) {
1180                 if (arm_smmu_free_sme(smmu, idx))
1181                         arm_smmu_write_sme(smmu, idx);
1182                 cfg->smendx[i] = INVALID_SMENDX;
1183         }
1184         mutex_unlock(&smmu->stream_map_mutex);
1185 }
1186
1187 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1188                                       struct iommu_fwspec *fwspec)
1189 {
1190         struct arm_smmu_device *smmu = smmu_domain->smmu;
1191         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1192         u8 cbndx = smmu_domain->cfg.cbndx;
1193         enum arm_smmu_s2cr_type type;
1194         int i, idx;
1195
1196         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1197                 type = S2CR_TYPE_BYPASS;
1198         else
1199                 type = S2CR_TYPE_TRANS;
1200
1201         for_each_cfg_sme(fwspec, i, idx) {
1202                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1203                         continue;
1204
1205                 s2cr[idx].type = type;
1206                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1207                 s2cr[idx].cbndx = cbndx;
1208                 arm_smmu_write_s2cr(smmu, idx);
1209         }
1210         return 0;
1211 }
1212
1213 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1214 {
1215         int ret;
1216         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1217         struct arm_smmu_device *smmu;
1218         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1219
1220         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1221                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1222                 return -ENXIO;
1223         }
1224
1225         /*
1226          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1227          * domains between of_xlate() and add_device() - we have no way to cope
1228          * with that, so until ARM gets converted to rely on groups and default
1229          * domains, just say no (but more politely than by dereferencing NULL).
1230          * This should be at least a WARN_ON once that's sorted.
1231          */
1232         if (!fwspec->iommu_priv)
1233                 return -ENODEV;
1234
1235         smmu = fwspec_smmu(fwspec);
1236         /* Ensure that the domain is finalised */
1237         ret = arm_smmu_init_domain_context(domain, smmu);
1238         if (ret < 0)
1239                 return ret;
1240
1241         /*
1242          * Sanity check the domain. We don't support domains across
1243          * different SMMUs.
1244          */
1245         if (smmu_domain->smmu != smmu) {
1246                 dev_err(dev,
1247                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1248                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1249                 return -EINVAL;
1250         }
1251
1252         /* Looks ok, so add the device to the domain */
1253         return arm_smmu_domain_add_master(smmu_domain, fwspec);
1254 }
1255
1256 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1257                         phys_addr_t paddr, size_t size, int prot)
1258 {
1259         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1260
1261         if (!ops)
1262                 return -ENODEV;
1263
1264         return ops->map(ops, iova, paddr, size, prot);
1265 }
1266
1267 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1268                              size_t size)
1269 {
1270         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1271
1272         if (!ops)
1273                 return 0;
1274
1275         return ops->unmap(ops, iova, size);
1276 }
1277
1278 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1279                                               dma_addr_t iova)
1280 {
1281         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1282         struct arm_smmu_device *smmu = smmu_domain->smmu;
1283         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1284         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1285         struct device *dev = smmu->dev;
1286         void __iomem *cb_base;
1287         u32 tmp;
1288         u64 phys;
1289         unsigned long va, flags;
1290
1291         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1292
1293         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1294         /* ATS1 registers can only be written atomically */
1295         va = iova & ~0xfffUL;
1296         if (smmu->version == ARM_SMMU_V2)
1297                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1298         else /* Register is only 32-bit in v1 */
1299                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1300
1301         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1302                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1303                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1304                 dev_err(dev,
1305                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1306                         &iova);
1307                 return ops->iova_to_phys(ops, iova);
1308         }
1309
1310         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1311         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1312         if (phys & CB_PAR_F) {
1313                 dev_err(dev, "translation fault!\n");
1314                 dev_err(dev, "PAR = 0x%llx\n", phys);
1315                 return 0;
1316         }
1317
1318         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1319 }
1320
1321 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1322                                         dma_addr_t iova)
1323 {
1324         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1325         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1326
1327         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1328                 return iova;
1329
1330         if (!ops)
1331                 return 0;
1332
1333         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1334                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1335                 return arm_smmu_iova_to_phys_hard(domain, iova);
1336
1337         return ops->iova_to_phys(ops, iova);
1338 }
1339
1340 static bool arm_smmu_capable(enum iommu_cap cap)
1341 {
1342         switch (cap) {
1343         case IOMMU_CAP_CACHE_COHERENCY:
1344                 /*
1345                  * Return true here as the SMMU can always send out coherent
1346                  * requests.
1347                  */
1348                 return true;
1349         case IOMMU_CAP_NOEXEC:
1350                 return true;
1351         default:
1352                 return false;
1353         }
1354 }
1355
1356 static int arm_smmu_match_node(struct device *dev, void *data)
1357 {
1358         return dev->fwnode == data;
1359 }
1360
1361 static
1362 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1363 {
1364         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1365                                                 fwnode, arm_smmu_match_node);
1366         put_device(dev);
1367         return dev ? dev_get_drvdata(dev) : NULL;
1368 }
1369
1370 static int arm_smmu_add_device(struct device *dev)
1371 {
1372         struct arm_smmu_device *smmu;
1373         struct arm_smmu_master_cfg *cfg;
1374         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1375         int i, ret;
1376
1377         if (using_legacy_binding) {
1378                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1379
1380                 /*
1381                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1382                  * will allocate/initialise a new one. Thus we need to update fwspec for
1383                  * later use.
1384                  */
1385                 fwspec = dev->iommu_fwspec;
1386                 if (ret)
1387                         goto out_free;
1388         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1389                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1390         } else {
1391                 return -ENODEV;
1392         }
1393
1394         ret = -EINVAL;
1395         for (i = 0; i < fwspec->num_ids; i++) {
1396                 u16 sid = fwspec->ids[i];
1397                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1398
1399                 if (sid & ~smmu->streamid_mask) {
1400                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1401                                 sid, smmu->streamid_mask);
1402                         goto out_free;
1403                 }
1404                 if (mask & ~smmu->smr_mask_mask) {
1405                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1406                                 mask, smmu->smr_mask_mask);
1407                         goto out_free;
1408                 }
1409         }
1410
1411         ret = -ENOMEM;
1412         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1413                       GFP_KERNEL);
1414         if (!cfg)
1415                 goto out_free;
1416
1417         cfg->smmu = smmu;
1418         fwspec->iommu_priv = cfg;
1419         while (i--)
1420                 cfg->smendx[i] = INVALID_SMENDX;
1421
1422         ret = arm_smmu_master_alloc_smes(dev);
1423         if (ret)
1424                 goto out_cfg_free;
1425
1426         iommu_device_link(&smmu->iommu, dev);
1427
1428         return 0;
1429
1430 out_cfg_free:
1431         kfree(cfg);
1432 out_free:
1433         iommu_fwspec_free(dev);
1434         return ret;
1435 }
1436
1437 static void arm_smmu_remove_device(struct device *dev)
1438 {
1439         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1440         struct arm_smmu_master_cfg *cfg;
1441         struct arm_smmu_device *smmu;
1442
1443
1444         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1445                 return;
1446
1447         cfg  = fwspec->iommu_priv;
1448         smmu = cfg->smmu;
1449
1450         iommu_device_unlink(&smmu->iommu, dev);
1451         arm_smmu_master_free_smes(fwspec);
1452         iommu_group_remove_device(dev);
1453         kfree(fwspec->iommu_priv);
1454         iommu_fwspec_free(dev);
1455 }
1456
1457 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1458 {
1459         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1460         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1461         struct iommu_group *group = NULL;
1462         int i, idx;
1463
1464         for_each_cfg_sme(fwspec, i, idx) {
1465                 if (group && smmu->s2crs[idx].group &&
1466                     group != smmu->s2crs[idx].group)
1467                         return ERR_PTR(-EINVAL);
1468
1469                 group = smmu->s2crs[idx].group;
1470         }
1471
1472         if (group)
1473                 return iommu_group_ref_get(group);
1474
1475         if (dev_is_pci(dev))
1476                 group = pci_device_group(dev);
1477         else
1478                 group = generic_device_group(dev);
1479
1480         return group;
1481 }
1482
1483 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1484                                     enum iommu_attr attr, void *data)
1485 {
1486         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1487
1488         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1489                 return -EINVAL;
1490
1491         switch (attr) {
1492         case DOMAIN_ATTR_NESTING:
1493                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1494                 return 0;
1495         default:
1496                 return -ENODEV;
1497         }
1498 }
1499
1500 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1501                                     enum iommu_attr attr, void *data)
1502 {
1503         int ret = 0;
1504         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1505
1506         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1507                 return -EINVAL;
1508
1509         mutex_lock(&smmu_domain->init_mutex);
1510
1511         switch (attr) {
1512         case DOMAIN_ATTR_NESTING:
1513                 if (smmu_domain->smmu) {
1514                         ret = -EPERM;
1515                         goto out_unlock;
1516                 }
1517
1518                 if (*(int *)data)
1519                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1520                 else
1521                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1522
1523                 break;
1524         default:
1525                 ret = -ENODEV;
1526         }
1527
1528 out_unlock:
1529         mutex_unlock(&smmu_domain->init_mutex);
1530         return ret;
1531 }
1532
1533 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1534 {
1535         u32 mask, fwid = 0;
1536
1537         if (args->args_count > 0)
1538                 fwid |= (u16)args->args[0];
1539
1540         if (args->args_count > 1)
1541                 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1542         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1543                 fwid |= (u16)mask << SMR_MASK_SHIFT;
1544
1545         return iommu_fwspec_add_ids(dev, &fwid, 1);
1546 }
1547
1548 static void arm_smmu_get_resv_regions(struct device *dev,
1549                                       struct list_head *head)
1550 {
1551         struct iommu_resv_region *region;
1552         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1553
1554         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1555                                          prot, IOMMU_RESV_SW_MSI);
1556         if (!region)
1557                 return;
1558
1559         list_add_tail(&region->list, head);
1560
1561         iommu_dma_get_resv_regions(dev, head);
1562 }
1563
1564 static void arm_smmu_put_resv_regions(struct device *dev,
1565                                       struct list_head *head)
1566 {
1567         struct iommu_resv_region *entry, *next;
1568
1569         list_for_each_entry_safe(entry, next, head, list)
1570                 kfree(entry);
1571 }
1572
1573 static struct iommu_ops arm_smmu_ops = {
1574         .capable                = arm_smmu_capable,
1575         .domain_alloc           = arm_smmu_domain_alloc,
1576         .domain_free            = arm_smmu_domain_free,
1577         .attach_dev             = arm_smmu_attach_dev,
1578         .map                    = arm_smmu_map,
1579         .unmap                  = arm_smmu_unmap,
1580         .map_sg                 = default_iommu_map_sg,
1581         .iova_to_phys           = arm_smmu_iova_to_phys,
1582         .add_device             = arm_smmu_add_device,
1583         .remove_device          = arm_smmu_remove_device,
1584         .device_group           = arm_smmu_device_group,
1585         .domain_get_attr        = arm_smmu_domain_get_attr,
1586         .domain_set_attr        = arm_smmu_domain_set_attr,
1587         .of_xlate               = arm_smmu_of_xlate,
1588         .get_resv_regions       = arm_smmu_get_resv_regions,
1589         .put_resv_regions       = arm_smmu_put_resv_regions,
1590         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1591 };
1592
1593 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1594 {
1595         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1596         int i;
1597         u32 reg, major;
1598
1599         /* clear global FSR */
1600         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1601         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1602
1603         /*
1604          * Reset stream mapping groups: Initial values mark all SMRn as
1605          * invalid and all S2CRn as bypass unless overridden.
1606          */
1607         for (i = 0; i < smmu->num_mapping_groups; ++i)
1608                 arm_smmu_write_sme(smmu, i);
1609
1610         if (smmu->model == ARM_MMU500) {
1611                 /*
1612                  * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1613                  * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1614                  * bit is only present in MMU-500r2 onwards.
1615                  */
1616                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1617                 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1618                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1619                 if (major >= 2)
1620                         reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1621                 /*
1622                  * Allow unmatched Stream IDs to allocate bypass
1623                  * TLB entries for reduced latency.
1624                  */
1625                 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN;
1626                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1627         }
1628
1629         /* Make sure all context banks are disabled and clear CB_FSR  */
1630         for (i = 0; i < smmu->num_context_banks; ++i) {
1631                 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1632
1633                 arm_smmu_write_context_bank(smmu, i);
1634                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1635                 /*
1636                  * Disable MMU-500's not-particularly-beneficial next-page
1637                  * prefetcher for the sake of errata #841119 and #826419.
1638                  */
1639                 if (smmu->model == ARM_MMU500) {
1640                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1641                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1642                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1643                 }
1644         }
1645
1646         /* Invalidate the TLB, just in case */
1647         writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1648         writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1649
1650         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1651
1652         /* Enable fault reporting */
1653         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1654
1655         /* Disable TLB broadcasting. */
1656         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1657
1658         /* Enable client access, handling unmatched streams as appropriate */
1659         reg &= ~sCR0_CLIENTPD;
1660         if (disable_bypass)
1661                 reg |= sCR0_USFCFG;
1662         else
1663                 reg &= ~sCR0_USFCFG;
1664
1665         /* Disable forced broadcasting */
1666         reg &= ~sCR0_FB;
1667
1668         /* Don't upgrade barriers */
1669         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1670
1671         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1672                 reg |= sCR0_VMID16EN;
1673
1674         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1675                 reg |= sCR0_EXIDENABLE;
1676
1677         /* Push the button */
1678         arm_smmu_tlb_sync_global(smmu);
1679         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1680 }
1681
1682 static int arm_smmu_id_size_to_bits(int size)
1683 {
1684         switch (size) {
1685         case 0:
1686                 return 32;
1687         case 1:
1688                 return 36;
1689         case 2:
1690                 return 40;
1691         case 3:
1692                 return 42;
1693         case 4:
1694                 return 44;
1695         case 5:
1696         default:
1697                 return 48;
1698         }
1699 }
1700
1701 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1702 {
1703         unsigned long size;
1704         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1705         u32 id;
1706         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1707         int i;
1708
1709         dev_notice(smmu->dev, "probing hardware configuration...\n");
1710         dev_notice(smmu->dev, "SMMUv%d with:\n",
1711                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1712
1713         /* ID0 */
1714         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1715
1716         /* Restrict available stages based on module parameter */
1717         if (force_stage == 1)
1718                 id &= ~(ID0_S2TS | ID0_NTS);
1719         else if (force_stage == 2)
1720                 id &= ~(ID0_S1TS | ID0_NTS);
1721
1722         if (id & ID0_S1TS) {
1723                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1724                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1725         }
1726
1727         if (id & ID0_S2TS) {
1728                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1729                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1730         }
1731
1732         if (id & ID0_NTS) {
1733                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1734                 dev_notice(smmu->dev, "\tnested translation\n");
1735         }
1736
1737         if (!(smmu->features &
1738                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1739                 dev_err(smmu->dev, "\tno translation support!\n");
1740                 return -ENODEV;
1741         }
1742
1743         if ((id & ID0_S1TS) &&
1744                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1745                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1746                 dev_notice(smmu->dev, "\taddress translation ops\n");
1747         }
1748
1749         /*
1750          * In order for DMA API calls to work properly, we must defer to what
1751          * the FW says about coherency, regardless of what the hardware claims.
1752          * Fortunately, this also opens up a workaround for systems where the
1753          * ID register value has ended up configured incorrectly.
1754          */
1755         cttw_reg = !!(id & ID0_CTTW);
1756         if (cttw_fw || cttw_reg)
1757                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1758                            cttw_fw ? "" : "non-");
1759         if (cttw_fw != cttw_reg)
1760                 dev_notice(smmu->dev,
1761                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1762
1763         /* Max. number of entries we have for stream matching/indexing */
1764         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1765                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1766                 size = 1 << 16;
1767         } else {
1768                 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1769         }
1770         smmu->streamid_mask = size - 1;
1771         if (id & ID0_SMS) {
1772                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1773                 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1774                 if (size == 0) {
1775                         dev_err(smmu->dev,
1776                                 "stream-matching supported, but no SMRs present!\n");
1777                         return -ENODEV;
1778                 }
1779
1780                 /* Zero-initialised to mark as invalid */
1781                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1782                                           GFP_KERNEL);
1783                 if (!smmu->smrs)
1784                         return -ENOMEM;
1785
1786                 dev_notice(smmu->dev,
1787                            "\tstream matching with %lu register groups", size);
1788         }
1789         /* s2cr->type == 0 means translation, so initialise explicitly */
1790         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1791                                          GFP_KERNEL);
1792         if (!smmu->s2crs)
1793                 return -ENOMEM;
1794         for (i = 0; i < size; i++)
1795                 smmu->s2crs[i] = s2cr_init_val;
1796
1797         smmu->num_mapping_groups = size;
1798         mutex_init(&smmu->stream_map_mutex);
1799         spin_lock_init(&smmu->global_sync_lock);
1800
1801         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1802                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1803                 if (!(id & ID0_PTFS_NO_AARCH32S))
1804                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1805         }
1806
1807         /* ID1 */
1808         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1809         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1810
1811         /* Check for size mismatch of SMMU address space from mapped region */
1812         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1813         size <<= smmu->pgshift;
1814         if (smmu->cb_base != gr0_base + size)
1815                 dev_warn(smmu->dev,
1816                         "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1817                         size * 2, (smmu->cb_base - gr0_base) * 2);
1818
1819         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1820         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1821         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1822                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1823                 return -ENODEV;
1824         }
1825         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1826                    smmu->num_context_banks, smmu->num_s2_context_banks);
1827         /*
1828          * Cavium CN88xx erratum #27704.
1829          * Ensure ASID and VMID allocation is unique across all SMMUs in
1830          * the system.
1831          */
1832         if (smmu->model == CAVIUM_SMMUV2) {
1833                 smmu->cavium_id_base =
1834                         atomic_add_return(smmu->num_context_banks,
1835                                           &cavium_smmu_context_count);
1836                 smmu->cavium_id_base -= smmu->num_context_banks;
1837                 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1838         }
1839         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1840                                  sizeof(*smmu->cbs), GFP_KERNEL);
1841         if (!smmu->cbs)
1842                 return -ENOMEM;
1843
1844         /* ID2 */
1845         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1846         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1847         smmu->ipa_size = size;
1848
1849         /* The output mask is also applied for bypass */
1850         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1851         smmu->pa_size = size;
1852
1853         if (id & ID2_VMID16)
1854                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1855
1856         /*
1857          * What the page table walker can address actually depends on which
1858          * descriptor format is in use, but since a) we don't know that yet,
1859          * and b) it can vary per context bank, this will have to do...
1860          */
1861         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1862                 dev_warn(smmu->dev,
1863                          "failed to set DMA mask for table walker\n");
1864
1865         if (smmu->version < ARM_SMMU_V2) {
1866                 smmu->va_size = smmu->ipa_size;
1867                 if (smmu->version == ARM_SMMU_V1_64K)
1868                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1869         } else {
1870                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1871                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1872                 if (id & ID2_PTFS_4K)
1873                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1874                 if (id & ID2_PTFS_16K)
1875                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1876                 if (id & ID2_PTFS_64K)
1877                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1878         }
1879
1880         /* Now we've corralled the various formats, what'll it do? */
1881         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1882                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1883         if (smmu->features &
1884             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1885                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1886         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1887                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1888         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1889                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1890
1891         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1892                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1893         else
1894                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1895         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1896                    smmu->pgsize_bitmap);
1897
1898
1899         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1900                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1901                            smmu->va_size, smmu->ipa_size);
1902
1903         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1904                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1905                            smmu->ipa_size, smmu->pa_size);
1906
1907         return 0;
1908 }
1909
1910 struct arm_smmu_match_data {
1911         enum arm_smmu_arch_version version;
1912         enum arm_smmu_implementation model;
1913 };
1914
1915 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1916 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1917
1918 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1919 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1920 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1921 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1922 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1923 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1924
1925 static const struct of_device_id arm_smmu_of_match[] = {
1926         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1927         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1928         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1929         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1930         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1931         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1932         { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1933         { },
1934 };
1935 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1936
1937 #ifdef CONFIG_ACPI
1938 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1939 {
1940         int ret = 0;
1941
1942         switch (model) {
1943         case ACPI_IORT_SMMU_V1:
1944         case ACPI_IORT_SMMU_CORELINK_MMU400:
1945                 smmu->version = ARM_SMMU_V1;
1946                 smmu->model = GENERIC_SMMU;
1947                 break;
1948         case ACPI_IORT_SMMU_CORELINK_MMU401:
1949                 smmu->version = ARM_SMMU_V1_64K;
1950                 smmu->model = GENERIC_SMMU;
1951                 break;
1952         case ACPI_IORT_SMMU_V2:
1953                 smmu->version = ARM_SMMU_V2;
1954                 smmu->model = GENERIC_SMMU;
1955                 break;
1956         case ACPI_IORT_SMMU_CORELINK_MMU500:
1957                 smmu->version = ARM_SMMU_V2;
1958                 smmu->model = ARM_MMU500;
1959                 break;
1960         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1961                 smmu->version = ARM_SMMU_V2;
1962                 smmu->model = CAVIUM_SMMUV2;
1963                 break;
1964         default:
1965                 ret = -ENODEV;
1966         }
1967
1968         return ret;
1969 }
1970
1971 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1972                                       struct arm_smmu_device *smmu)
1973 {
1974         struct device *dev = smmu->dev;
1975         struct acpi_iort_node *node =
1976                 *(struct acpi_iort_node **)dev_get_platdata(dev);
1977         struct acpi_iort_smmu *iort_smmu;
1978         int ret;
1979
1980         /* Retrieve SMMU1/2 specific data */
1981         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1982
1983         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1984         if (ret < 0)
1985                 return ret;
1986
1987         /* Ignore the configuration access interrupt */
1988         smmu->num_global_irqs = 1;
1989
1990         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1991                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1992
1993         return 0;
1994 }
1995 #else
1996 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1997                                              struct arm_smmu_device *smmu)
1998 {
1999         return -ENODEV;
2000 }
2001 #endif
2002
2003 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2004                                     struct arm_smmu_device *smmu)
2005 {
2006         const struct arm_smmu_match_data *data;
2007         struct device *dev = &pdev->dev;
2008         bool legacy_binding;
2009
2010         if (of_property_read_u32(dev->of_node, "#global-interrupts",
2011                                  &smmu->num_global_irqs)) {
2012                 dev_err(dev, "missing #global-interrupts property\n");
2013                 return -ENODEV;
2014         }
2015
2016         data = of_device_get_match_data(dev);
2017         smmu->version = data->version;
2018         smmu->model = data->model;
2019
2020         parse_driver_options(smmu);
2021
2022         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2023         if (legacy_binding && !using_generic_binding) {
2024                 if (!using_legacy_binding)
2025                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2026                 using_legacy_binding = true;
2027         } else if (!legacy_binding && !using_legacy_binding) {
2028                 using_generic_binding = true;
2029         } else {
2030                 dev_err(dev, "not probing due to mismatched DT properties\n");
2031                 return -ENODEV;
2032         }
2033
2034         if (of_dma_is_coherent(dev->of_node))
2035                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2036
2037         return 0;
2038 }
2039
2040 static void arm_smmu_bus_init(void)
2041 {
2042         /* Oh, for a proper bus abstraction */
2043         if (!iommu_present(&platform_bus_type))
2044                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2045 #ifdef CONFIG_ARM_AMBA
2046         if (!iommu_present(&amba_bustype))
2047                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2048 #endif
2049 #ifdef CONFIG_PCI
2050         if (!iommu_present(&pci_bus_type)) {
2051                 pci_request_acs();
2052                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2053         }
2054 #endif
2055 }
2056
2057 static int arm_smmu_device_probe(struct platform_device *pdev)
2058 {
2059         struct resource *res;
2060         resource_size_t ioaddr;
2061         struct arm_smmu_device *smmu;
2062         struct device *dev = &pdev->dev;
2063         int num_irqs, i, err;
2064
2065         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2066         if (!smmu) {
2067                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2068                 return -ENOMEM;
2069         }
2070         smmu->dev = dev;
2071
2072         if (dev->of_node)
2073                 err = arm_smmu_device_dt_probe(pdev, smmu);
2074         else
2075                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2076
2077         if (err)
2078                 return err;
2079
2080         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2081         ioaddr = res->start;
2082         smmu->base = devm_ioremap_resource(dev, res);
2083         if (IS_ERR(smmu->base))
2084                 return PTR_ERR(smmu->base);
2085         smmu->cb_base = smmu->base + resource_size(res) / 2;
2086
2087         num_irqs = 0;
2088         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2089                 num_irqs++;
2090                 if (num_irqs > smmu->num_global_irqs)
2091                         smmu->num_context_irqs++;
2092         }
2093
2094         if (!smmu->num_context_irqs) {
2095                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2096                         num_irqs, smmu->num_global_irqs + 1);
2097                 return -ENODEV;
2098         }
2099
2100         smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
2101                                   GFP_KERNEL);
2102         if (!smmu->irqs) {
2103                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2104                 return -ENOMEM;
2105         }
2106
2107         for (i = 0; i < num_irqs; ++i) {
2108                 int irq = platform_get_irq(pdev, i);
2109
2110                 if (irq < 0) {
2111                         dev_err(dev, "failed to get irq index %d\n", i);
2112                         return -ENODEV;
2113                 }
2114                 smmu->irqs[i] = irq;
2115         }
2116
2117         err = arm_smmu_device_cfg_probe(smmu);
2118         if (err)
2119                 return err;
2120
2121         if (smmu->version == ARM_SMMU_V2) {
2122                 if (smmu->num_context_banks > smmu->num_context_irqs) {
2123                         dev_err(dev,
2124                               "found only %d context irq(s) but %d required\n",
2125                               smmu->num_context_irqs, smmu->num_context_banks);
2126                         return -ENODEV;
2127                 }
2128
2129                 /* Ignore superfluous interrupts */
2130                 smmu->num_context_irqs = smmu->num_context_banks;
2131         }
2132
2133         for (i = 0; i < smmu->num_global_irqs; ++i) {
2134                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2135                                        arm_smmu_global_fault,
2136                                        IRQF_SHARED,
2137                                        "arm-smmu global fault",
2138                                        smmu);
2139                 if (err) {
2140                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2141                                 i, smmu->irqs[i]);
2142                         return err;
2143                 }
2144         }
2145
2146         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2147                                      "smmu.%pa", &ioaddr);
2148         if (err) {
2149                 dev_err(dev, "Failed to register iommu in sysfs\n");
2150                 return err;
2151         }
2152
2153         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2154         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2155
2156         err = iommu_device_register(&smmu->iommu);
2157         if (err) {
2158                 dev_err(dev, "Failed to register iommu\n");
2159                 return err;
2160         }
2161
2162         platform_set_drvdata(pdev, smmu);
2163         arm_smmu_device_reset(smmu);
2164         arm_smmu_test_smr_masks(smmu);
2165
2166         /*
2167          * For ACPI and generic DT bindings, an SMMU will be probed before
2168          * any device which might need it, so we want the bus ops in place
2169          * ready to handle default domain setup as soon as any SMMU exists.
2170          */
2171         if (!using_legacy_binding)
2172                 arm_smmu_bus_init();
2173
2174         return 0;
2175 }
2176
2177 /*
2178  * With the legacy DT binding in play, though, we have no guarantees about
2179  * probe order, but then we're also not doing default domains, so we can
2180  * delay setting bus ops until we're sure every possible SMMU is ready,
2181  * and that way ensure that no add_device() calls get missed.
2182  */
2183 static int arm_smmu_legacy_bus_init(void)
2184 {
2185         if (using_legacy_binding)
2186                 arm_smmu_bus_init();
2187         return 0;
2188 }
2189 device_initcall_sync(arm_smmu_legacy_bus_init);
2190
2191 static int arm_smmu_device_remove(struct platform_device *pdev)
2192 {
2193         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2194
2195         if (!smmu)
2196                 return -ENODEV;
2197
2198         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2199                 dev_err(&pdev->dev, "removing device with active domains!\n");
2200
2201         /* Turn the thing off */
2202         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2203         return 0;
2204 }
2205
2206 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2207 {
2208         arm_smmu_device_remove(pdev);
2209 }
2210
2211 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2212 {
2213         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2214
2215         arm_smmu_device_reset(smmu);
2216         return 0;
2217 }
2218
2219 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2220
2221 static struct platform_driver arm_smmu_driver = {
2222         .driver = {
2223                 .name           = "arm-smmu",
2224                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2225                 .pm             = &arm_smmu_pm_ops,
2226         },
2227         .probe  = arm_smmu_device_probe,
2228         .remove = arm_smmu_device_remove,
2229         .shutdown = arm_smmu_device_shutdown,
2230 };
2231 module_platform_driver(arm_smmu_driver);
2232
2233 IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", NULL);
2234 IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", NULL);
2235 IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", NULL);
2236 IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", NULL);
2237 IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", NULL);
2238 IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", NULL);
2239
2240 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2241 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2242 MODULE_LICENSE("GPL v2");