2 * IOMMU API for ARM architected SMMUv3 implementations.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright (C) 2015 ARM Limited
18 * Author: Will Deacon <will.deacon@arm.com>
20 * This driver is powered by bad coffee and bombay mix.
23 #include <linux/acpi.h>
24 #include <linux/acpi_iort.h>
25 #include <linux/bitfield.h>
26 #include <linux/bitops.h>
27 #include <linux/crash_dump.h>
28 #include <linux/delay.h>
29 #include <linux/dma-iommu.h>
30 #include <linux/err.h>
31 #include <linux/interrupt.h>
32 #include <linux/iommu.h>
33 #include <linux/iopoll.h>
34 #include <linux/module.h>
35 #include <linux/msi.h>
37 #include <linux/of_address.h>
38 #include <linux/of_iommu.h>
39 #include <linux/of_platform.h>
40 #include <linux/pci.h>
41 #include <linux/platform_device.h>
43 #include <linux/amba/bus.h>
45 #include "io-pgtable.h"
48 #define ARM_SMMU_IDR0 0x0
49 #define IDR0_ST_LVL GENMASK(28, 27)
50 #define IDR0_ST_LVL_2LVL 1
51 #define IDR0_STALL_MODEL GENMASK(25, 24)
52 #define IDR0_STALL_MODEL_STALL 0
53 #define IDR0_STALL_MODEL_FORCE 2
54 #define IDR0_TTENDIAN GENMASK(22, 21)
55 #define IDR0_TTENDIAN_MIXED 0
56 #define IDR0_TTENDIAN_LE 2
57 #define IDR0_TTENDIAN_BE 3
58 #define IDR0_CD2L (1 << 19)
59 #define IDR0_VMID16 (1 << 18)
60 #define IDR0_PRI (1 << 16)
61 #define IDR0_SEV (1 << 14)
62 #define IDR0_MSI (1 << 13)
63 #define IDR0_ASID16 (1 << 12)
64 #define IDR0_ATS (1 << 10)
65 #define IDR0_HYP (1 << 9)
66 #define IDR0_COHACC (1 << 4)
67 #define IDR0_TTF GENMASK(3, 2)
68 #define IDR0_TTF_AARCH64 2
69 #define IDR0_TTF_AARCH32_64 3
70 #define IDR0_S1P (1 << 1)
71 #define IDR0_S2P (1 << 0)
73 #define ARM_SMMU_IDR1 0x4
74 #define IDR1_TABLES_PRESET (1 << 30)
75 #define IDR1_QUEUES_PRESET (1 << 29)
76 #define IDR1_REL (1 << 28)
77 #define IDR1_CMDQS GENMASK(25, 21)
78 #define IDR1_EVTQS GENMASK(20, 16)
79 #define IDR1_PRIQS GENMASK(15, 11)
80 #define IDR1_SSIDSIZE GENMASK(10, 6)
81 #define IDR1_SIDSIZE GENMASK(5, 0)
83 #define ARM_SMMU_IDR5 0x14
84 #define IDR5_STALL_MAX GENMASK(31, 16)
85 #define IDR5_GRAN64K (1 << 6)
86 #define IDR5_GRAN16K (1 << 5)
87 #define IDR5_GRAN4K (1 << 4)
88 #define IDR5_OAS GENMASK(2, 0)
89 #define IDR5_OAS_32_BIT 0
90 #define IDR5_OAS_36_BIT 1
91 #define IDR5_OAS_40_BIT 2
92 #define IDR5_OAS_42_BIT 3
93 #define IDR5_OAS_44_BIT 4
94 #define IDR5_OAS_48_BIT 5
95 #define IDR5_OAS_52_BIT 6
96 #define IDR5_VAX GENMASK(11, 10)
97 #define IDR5_VAX_52_BIT 1
99 #define ARM_SMMU_CR0 0x20
100 #define CR0_CMDQEN (1 << 3)
101 #define CR0_EVTQEN (1 << 2)
102 #define CR0_PRIQEN (1 << 1)
103 #define CR0_SMMUEN (1 << 0)
105 #define ARM_SMMU_CR0ACK 0x24
107 #define ARM_SMMU_CR1 0x28
108 #define CR1_TABLE_SH GENMASK(11, 10)
109 #define CR1_TABLE_OC GENMASK(9, 8)
110 #define CR1_TABLE_IC GENMASK(7, 6)
111 #define CR1_QUEUE_SH GENMASK(5, 4)
112 #define CR1_QUEUE_OC GENMASK(3, 2)
113 #define CR1_QUEUE_IC GENMASK(1, 0)
114 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
115 #define CR1_CACHE_NC 0
116 #define CR1_CACHE_WB 1
117 #define CR1_CACHE_WT 2
119 #define ARM_SMMU_CR2 0x2c
120 #define CR2_PTM (1 << 2)
121 #define CR2_RECINVSID (1 << 1)
122 #define CR2_E2H (1 << 0)
124 #define ARM_SMMU_GBPA 0x44
125 #define GBPA_UPDATE (1 << 31)
126 #define GBPA_ABORT (1 << 20)
128 #define ARM_SMMU_IRQ_CTRL 0x50
129 #define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
130 #define IRQ_CTRL_PRIQ_IRQEN (1 << 1)
131 #define IRQ_CTRL_GERROR_IRQEN (1 << 0)
133 #define ARM_SMMU_IRQ_CTRLACK 0x54
135 #define ARM_SMMU_GERROR 0x60
136 #define GERROR_SFM_ERR (1 << 8)
137 #define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
138 #define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
139 #define GERROR_MSI_EVTQ_ABT_ERR (1 << 5)
140 #define GERROR_MSI_CMDQ_ABT_ERR (1 << 4)
141 #define GERROR_PRIQ_ABT_ERR (1 << 3)
142 #define GERROR_EVTQ_ABT_ERR (1 << 2)
143 #define GERROR_CMDQ_ERR (1 << 0)
144 #define GERROR_ERR_MASK 0xfd
146 #define ARM_SMMU_GERRORN 0x64
148 #define ARM_SMMU_GERROR_IRQ_CFG0 0x68
149 #define ARM_SMMU_GERROR_IRQ_CFG1 0x70
150 #define ARM_SMMU_GERROR_IRQ_CFG2 0x74
152 #define ARM_SMMU_STRTAB_BASE 0x80
153 #define STRTAB_BASE_RA (1UL << 62)
154 #define STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6)
156 #define ARM_SMMU_STRTAB_BASE_CFG 0x88
157 #define STRTAB_BASE_CFG_FMT GENMASK(17, 16)
158 #define STRTAB_BASE_CFG_FMT_LINEAR 0
159 #define STRTAB_BASE_CFG_FMT_2LVL 1
160 #define STRTAB_BASE_CFG_SPLIT GENMASK(10, 6)
161 #define STRTAB_BASE_CFG_LOG2SIZE GENMASK(5, 0)
163 #define ARM_SMMU_CMDQ_BASE 0x90
164 #define ARM_SMMU_CMDQ_PROD 0x98
165 #define ARM_SMMU_CMDQ_CONS 0x9c
167 #define ARM_SMMU_EVTQ_BASE 0xa0
168 #define ARM_SMMU_EVTQ_PROD 0x100a8
169 #define ARM_SMMU_EVTQ_CONS 0x100ac
170 #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
171 #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
172 #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
174 #define ARM_SMMU_PRIQ_BASE 0xc0
175 #define ARM_SMMU_PRIQ_PROD 0x100c8
176 #define ARM_SMMU_PRIQ_CONS 0x100cc
177 #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
178 #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
179 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
181 /* Common MSI config fields */
182 #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
183 #define MSI_CFG2_SH GENMASK(5, 4)
184 #define MSI_CFG2_MEMATTR GENMASK(3, 0)
186 /* Common memory attribute values */
187 #define ARM_SMMU_SH_NSH 0
188 #define ARM_SMMU_SH_OSH 2
189 #define ARM_SMMU_SH_ISH 3
190 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
191 #define ARM_SMMU_MEMATTR_OIWB 0xf
193 #define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1))
194 #define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift))
195 #define Q_OVERFLOW_FLAG (1 << 31)
196 #define Q_OVF(q, p) ((p) & Q_OVERFLOW_FLAG)
197 #define Q_ENT(q, p) ((q)->base + \
198 Q_IDX(q, p) * (q)->ent_dwords)
200 #define Q_BASE_RWA (1UL << 62)
201 #define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
202 #define Q_BASE_LOG2SIZE GENMASK(4, 0)
207 * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
208 * 2lvl: 128k L1 entries,
209 * 256 lazy entries per table (each table covers a PCI bus)
211 #define STRTAB_L1_SZ_SHIFT 20
212 #define STRTAB_SPLIT 8
214 #define STRTAB_L1_DESC_DWORDS 1
215 #define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
216 #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
218 #define STRTAB_STE_DWORDS 8
219 #define STRTAB_STE_0_V (1UL << 0)
220 #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
221 #define STRTAB_STE_0_CFG_ABORT 0
222 #define STRTAB_STE_0_CFG_BYPASS 4
223 #define STRTAB_STE_0_CFG_S1_TRANS 5
224 #define STRTAB_STE_0_CFG_S2_TRANS 6
226 #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
227 #define STRTAB_STE_0_S1FMT_LINEAR 0
228 #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6)
229 #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
231 #define STRTAB_STE_1_S1C_CACHE_NC 0UL
232 #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL
233 #define STRTAB_STE_1_S1C_CACHE_WT 2UL
234 #define STRTAB_STE_1_S1C_CACHE_WB 3UL
235 #define STRTAB_STE_1_S1CIR GENMASK_ULL(3, 2)
236 #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
237 #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
239 #define STRTAB_STE_1_S1STALLD (1UL << 27)
241 #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
242 #define STRTAB_STE_1_EATS_ABT 0UL
243 #define STRTAB_STE_1_EATS_TRANS 1UL
244 #define STRTAB_STE_1_EATS_S1CHK 2UL
246 #define STRTAB_STE_1_STRW GENMASK_ULL(31, 30)
247 #define STRTAB_STE_1_STRW_NSEL1 0UL
248 #define STRTAB_STE_1_STRW_EL2 2UL
250 #define STRTAB_STE_1_SHCFG GENMASK_ULL(45, 44)
251 #define STRTAB_STE_1_SHCFG_INCOMING 1UL
253 #define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0)
254 #define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32)
255 #define STRTAB_STE_2_S2AA64 (1UL << 51)
256 #define STRTAB_STE_2_S2ENDI (1UL << 52)
257 #define STRTAB_STE_2_S2PTW (1UL << 54)
258 #define STRTAB_STE_2_S2R (1UL << 58)
260 #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
262 /* Context descriptor (stage-1 only) */
263 #define CTXDESC_CD_DWORDS 8
264 #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
265 #define ARM64_TCR_T0SZ GENMASK_ULL(5, 0)
266 #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
267 #define ARM64_TCR_TG0 GENMASK_ULL(15, 14)
268 #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
269 #define ARM64_TCR_IRGN0 GENMASK_ULL(9, 8)
270 #define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10)
271 #define ARM64_TCR_ORGN0 GENMASK_ULL(11, 10)
272 #define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12)
273 #define ARM64_TCR_SH0 GENMASK_ULL(13, 12)
274 #define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14)
275 #define ARM64_TCR_EPD0 (1ULL << 7)
276 #define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30)
277 #define ARM64_TCR_EPD1 (1ULL << 23)
279 #define CTXDESC_CD_0_ENDI (1UL << 15)
280 #define CTXDESC_CD_0_V (1UL << 31)
282 #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32)
283 #define ARM64_TCR_IPS GENMASK_ULL(34, 32)
284 #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38)
285 #define ARM64_TCR_TBI0 (1ULL << 37)
287 #define CTXDESC_CD_0_AA64 (1UL << 41)
288 #define CTXDESC_CD_0_S (1UL << 44)
289 #define CTXDESC_CD_0_R (1UL << 45)
290 #define CTXDESC_CD_0_A (1UL << 46)
291 #define CTXDESC_CD_0_ASET (1UL << 47)
292 #define CTXDESC_CD_0_ASID GENMASK_ULL(63, 48)
294 #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
296 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
297 #define ARM_SMMU_TCR2CD(tcr, fld) FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
298 FIELD_GET(ARM64_TCR_##fld, tcr))
301 #define CMDQ_ENT_DWORDS 2
302 #define CMDQ_MAX_SZ_SHIFT 8
304 #define CMDQ_CONS_ERR GENMASK(30, 24)
305 #define CMDQ_ERR_CERROR_NONE_IDX 0
306 #define CMDQ_ERR_CERROR_ILL_IDX 1
307 #define CMDQ_ERR_CERROR_ABT_IDX 2
309 #define CMDQ_0_OP GENMASK_ULL(7, 0)
310 #define CMDQ_0_SSV (1UL << 11)
312 #define CMDQ_PREFETCH_0_SID GENMASK_ULL(63, 32)
313 #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
314 #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12)
316 #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32)
317 #define CMDQ_CFGI_1_LEAF (1UL << 0)
318 #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0)
320 #define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32)
321 #define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48)
322 #define CMDQ_TLBI_1_LEAF (1UL << 0)
323 #define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12)
324 #define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
326 #define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
327 #define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
328 #define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
329 #define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
331 #define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
332 #define CMDQ_SYNC_0_CS_NONE 0
333 #define CMDQ_SYNC_0_CS_IRQ 1
334 #define CMDQ_SYNC_0_CS_SEV 2
335 #define CMDQ_SYNC_0_MSH GENMASK_ULL(23, 22)
336 #define CMDQ_SYNC_0_MSIATTR GENMASK_ULL(27, 24)
337 #define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
338 #define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
341 #define EVTQ_ENT_DWORDS 4
342 #define EVTQ_MAX_SZ_SHIFT 7
344 #define EVTQ_0_ID GENMASK_ULL(7, 0)
347 #define PRIQ_ENT_DWORDS 2
348 #define PRIQ_MAX_SZ_SHIFT 8
350 #define PRIQ_0_SID GENMASK_ULL(31, 0)
351 #define PRIQ_0_SSID GENMASK_ULL(51, 32)
352 #define PRIQ_0_PERM_PRIV (1UL << 58)
353 #define PRIQ_0_PERM_EXEC (1UL << 59)
354 #define PRIQ_0_PERM_READ (1UL << 60)
355 #define PRIQ_0_PERM_WRITE (1UL << 61)
356 #define PRIQ_0_PRG_LAST (1UL << 62)
357 #define PRIQ_0_SSID_V (1UL << 63)
359 #define PRIQ_1_PRG_IDX GENMASK_ULL(8, 0)
360 #define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
362 /* High-level queue structures */
363 #define ARM_SMMU_POLL_TIMEOUT_US 100
364 #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */
365 #define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10
367 #define MSI_IOVA_BASE 0x8000000
368 #define MSI_IOVA_LENGTH 0x100000
370 static bool disable_bypass = 1;
371 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
372 MODULE_PARM_DESC(disable_bypass,
373 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
381 enum arm_smmu_msi_index {
388 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
390 ARM_SMMU_EVTQ_IRQ_CFG0,
391 ARM_SMMU_EVTQ_IRQ_CFG1,
392 ARM_SMMU_EVTQ_IRQ_CFG2,
394 [GERROR_MSI_INDEX] = {
395 ARM_SMMU_GERROR_IRQ_CFG0,
396 ARM_SMMU_GERROR_IRQ_CFG1,
397 ARM_SMMU_GERROR_IRQ_CFG2,
400 ARM_SMMU_PRIQ_IRQ_CFG0,
401 ARM_SMMU_PRIQ_IRQ_CFG1,
402 ARM_SMMU_PRIQ_IRQ_CFG2,
406 struct arm_smmu_cmdq_ent {
409 bool substream_valid;
411 /* Command-specific fields */
413 #define CMDQ_OP_PREFETCH_CFG 0x1
420 #define CMDQ_OP_CFGI_STE 0x3
421 #define CMDQ_OP_CFGI_ALL 0x4
430 #define CMDQ_OP_TLBI_NH_ASID 0x11
431 #define CMDQ_OP_TLBI_NH_VA 0x12
432 #define CMDQ_OP_TLBI_EL2_ALL 0x20
433 #define CMDQ_OP_TLBI_S12_VMALL 0x28
434 #define CMDQ_OP_TLBI_S2_IPA 0x2a
435 #define CMDQ_OP_TLBI_NSNH_ALL 0x30
443 #define CMDQ_OP_PRI_RESP 0x41
451 #define CMDQ_OP_CMD_SYNC 0x46
459 struct arm_smmu_queue {
460 int irq; /* Wired interrupt */
471 u32 __iomem *prod_reg;
472 u32 __iomem *cons_reg;
475 struct arm_smmu_cmdq {
476 struct arm_smmu_queue q;
480 struct arm_smmu_evtq {
481 struct arm_smmu_queue q;
485 struct arm_smmu_priq {
486 struct arm_smmu_queue q;
489 /* High-level stream table and context descriptor structures */
490 struct arm_smmu_strtab_l1_desc {
494 dma_addr_t l2ptr_dma;
497 struct arm_smmu_s1_cfg {
499 dma_addr_t cdptr_dma;
501 struct arm_smmu_ctx_desc {
509 struct arm_smmu_s2_cfg {
515 struct arm_smmu_strtab_ent {
517 * An STE is "assigned" if the master emitting the corresponding SID
518 * is attached to a domain. The behaviour of an unassigned STE is
519 * determined by the disable_bypass parameter, whereas an assigned
520 * STE behaves according to s1_cfg/s2_cfg, which themselves are
521 * configured according to the domain type.
524 struct arm_smmu_s1_cfg *s1_cfg;
525 struct arm_smmu_s2_cfg *s2_cfg;
528 struct arm_smmu_strtab_cfg {
530 dma_addr_t strtab_dma;
531 struct arm_smmu_strtab_l1_desc *l1_desc;
532 unsigned int num_l1_ents;
538 /* An SMMUv3 instance */
539 struct arm_smmu_device {
543 #define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
544 #define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
545 #define ARM_SMMU_FEAT_TT_LE (1 << 2)
546 #define ARM_SMMU_FEAT_TT_BE (1 << 3)
547 #define ARM_SMMU_FEAT_PRI (1 << 4)
548 #define ARM_SMMU_FEAT_ATS (1 << 5)
549 #define ARM_SMMU_FEAT_SEV (1 << 6)
550 #define ARM_SMMU_FEAT_MSI (1 << 7)
551 #define ARM_SMMU_FEAT_COHERENCY (1 << 8)
552 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 9)
553 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
554 #define ARM_SMMU_FEAT_STALLS (1 << 11)
555 #define ARM_SMMU_FEAT_HYP (1 << 12)
556 #define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
557 #define ARM_SMMU_FEAT_VAX (1 << 14)
560 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
561 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
564 struct arm_smmu_cmdq cmdq;
565 struct arm_smmu_evtq evtq;
566 struct arm_smmu_priq priq;
572 unsigned long ias; /* IPA */
573 unsigned long oas; /* PA */
574 unsigned long pgsize_bitmap;
576 #define ARM_SMMU_MAX_ASIDS (1 << 16)
577 unsigned int asid_bits;
578 DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
580 #define ARM_SMMU_MAX_VMIDS (1 << 16)
581 unsigned int vmid_bits;
582 DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
584 unsigned int ssid_bits;
585 unsigned int sid_bits;
587 struct arm_smmu_strtab_cfg strtab_cfg;
589 /* Hi16xx adds an extra 32 bits of goodness to its MSI payload */
595 /* IOMMU core code handle */
596 struct iommu_device iommu;
599 /* SMMU private data for each master */
600 struct arm_smmu_master_data {
601 struct arm_smmu_device *smmu;
602 struct arm_smmu_strtab_ent ste;
605 /* SMMU private data for an IOMMU domain */
606 enum arm_smmu_domain_stage {
607 ARM_SMMU_DOMAIN_S1 = 0,
609 ARM_SMMU_DOMAIN_NESTED,
610 ARM_SMMU_DOMAIN_BYPASS,
613 struct arm_smmu_domain {
614 struct arm_smmu_device *smmu;
615 struct mutex init_mutex; /* Protects smmu pointer */
617 struct io_pgtable_ops *pgtbl_ops;
619 enum arm_smmu_domain_stage stage;
621 struct arm_smmu_s1_cfg s1_cfg;
622 struct arm_smmu_s2_cfg s2_cfg;
625 struct iommu_domain domain;
628 struct arm_smmu_option_prop {
633 static struct arm_smmu_option_prop arm_smmu_options[] = {
634 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
635 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
639 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
640 struct arm_smmu_device *smmu)
642 if ((offset > SZ_64K) &&
643 (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
646 return smmu->base + offset;
649 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
651 return container_of(dom, struct arm_smmu_domain, domain);
654 static void parse_driver_options(struct arm_smmu_device *smmu)
659 if (of_property_read_bool(smmu->dev->of_node,
660 arm_smmu_options[i].prop)) {
661 smmu->options |= arm_smmu_options[i].opt;
662 dev_notice(smmu->dev, "option %s\n",
663 arm_smmu_options[i].prop);
665 } while (arm_smmu_options[++i].opt);
668 /* Low-level queue manipulation functions */
669 static bool queue_full(struct arm_smmu_queue *q)
671 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
672 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
675 static bool queue_empty(struct arm_smmu_queue *q)
677 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
678 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
681 static void queue_sync_cons(struct arm_smmu_queue *q)
683 q->cons = readl_relaxed(q->cons_reg);
686 static void queue_inc_cons(struct arm_smmu_queue *q)
688 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
690 q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
693 * Ensure that all CPU accesses (reads and writes) to the queue
694 * are complete before we update the cons pointer.
697 writel_relaxed(q->cons, q->cons_reg);
700 static int queue_sync_prod(struct arm_smmu_queue *q)
703 u32 prod = readl_relaxed(q->prod_reg);
705 if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
712 static void queue_inc_prod(struct arm_smmu_queue *q)
714 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
716 q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
717 writel(q->prod, q->prod_reg);
721 * Wait for the SMMU to consume items. If drain is true, wait until the queue
722 * is empty. Otherwise, wait until there is at least one free slot.
724 static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
727 unsigned int delay = 1, spin_cnt = 0;
729 /* Wait longer if it's a CMD_SYNC */
730 timeout = ktime_add_us(ktime_get(), sync ?
731 ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
732 ARM_SMMU_POLL_TIMEOUT_US);
734 while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
735 if (ktime_compare(ktime_get(), timeout) > 0)
740 } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
753 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
757 for (i = 0; i < n_dwords; ++i)
758 *dst++ = cpu_to_le64(*src++);
761 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
766 queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
771 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
775 for (i = 0; i < n_dwords; ++i)
776 *dst++ = le64_to_cpu(*src++);
779 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
784 queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
789 /* High-level queue accessors */
790 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
792 memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
793 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
795 switch (ent->opcode) {
796 case CMDQ_OP_TLBI_EL2_ALL:
797 case CMDQ_OP_TLBI_NSNH_ALL:
799 case CMDQ_OP_PREFETCH_CFG:
800 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
801 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
802 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
804 case CMDQ_OP_CFGI_STE:
805 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
806 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
808 case CMDQ_OP_CFGI_ALL:
809 /* Cover the entire SID range */
810 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
812 case CMDQ_OP_TLBI_NH_VA:
813 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
814 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
815 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
816 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
818 case CMDQ_OP_TLBI_S2_IPA:
819 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
820 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
821 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
823 case CMDQ_OP_TLBI_NH_ASID:
824 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
826 case CMDQ_OP_TLBI_S12_VMALL:
827 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
829 case CMDQ_OP_PRI_RESP:
830 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
831 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
832 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
833 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
834 switch (ent->pri.resp) {
842 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
844 case CMDQ_OP_CMD_SYNC:
845 if (ent->sync.msiaddr)
846 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
848 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
849 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
850 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
852 * Commands are written little-endian, but we want the SMMU to
853 * receive MSIData, and thus write it back to memory, in CPU
854 * byte order, so big-endian needs an extra byteswap here.
856 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA,
857 cpu_to_le32(ent->sync.msidata));
858 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
867 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
869 static const char *cerror_str[] = {
870 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
871 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
872 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
876 u64 cmd[CMDQ_ENT_DWORDS];
877 struct arm_smmu_queue *q = &smmu->cmdq.q;
878 u32 cons = readl_relaxed(q->cons_reg);
879 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
880 struct arm_smmu_cmdq_ent cmd_sync = {
881 .opcode = CMDQ_OP_CMD_SYNC,
884 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
885 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
888 case CMDQ_ERR_CERROR_ABT_IDX:
889 dev_err(smmu->dev, "retrying command fetch\n");
890 case CMDQ_ERR_CERROR_NONE_IDX:
892 case CMDQ_ERR_CERROR_ILL_IDX:
899 * We may have concurrent producers, so we need to be careful
900 * not to touch any of the shadow cmdq state.
902 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
903 dev_err(smmu->dev, "skipping command in error state:\n");
904 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
905 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
907 /* Convert the erroneous command into a CMD_SYNC */
908 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
909 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
913 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
916 static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
918 struct arm_smmu_queue *q = &smmu->cmdq.q;
919 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
921 while (queue_insert_raw(q, cmd) == -ENOSPC) {
922 if (queue_poll_cons(q, false, wfe))
923 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
927 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
928 struct arm_smmu_cmdq_ent *ent)
930 u64 cmd[CMDQ_ENT_DWORDS];
933 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
934 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
939 spin_lock_irqsave(&smmu->cmdq.lock, flags);
940 arm_smmu_cmdq_insert_cmd(smmu, cmd);
941 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
945 * The difference between val and sync_idx is bounded by the maximum size of
946 * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
948 static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
953 timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
954 val = smp_cond_load_acquire(&smmu->sync_count,
955 (int)(VAL - sync_idx) >= 0 ||
956 !ktime_before(ktime_get(), timeout));
958 return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
961 static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
963 u64 cmd[CMDQ_ENT_DWORDS];
965 struct arm_smmu_cmdq_ent ent = {
966 .opcode = CMDQ_OP_CMD_SYNC,
968 .msiaddr = virt_to_phys(&smmu->sync_count),
972 spin_lock_irqsave(&smmu->cmdq.lock, flags);
973 ent.sync.msidata = ++smmu->sync_nr;
974 arm_smmu_cmdq_build_cmd(cmd, &ent);
975 arm_smmu_cmdq_insert_cmd(smmu, cmd);
976 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
978 return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
981 static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
983 u64 cmd[CMDQ_ENT_DWORDS];
985 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
986 struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
989 arm_smmu_cmdq_build_cmd(cmd, &ent);
991 spin_lock_irqsave(&smmu->cmdq.lock, flags);
992 arm_smmu_cmdq_insert_cmd(smmu, cmd);
993 ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
994 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
999 static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1002 bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
1003 (smmu->features & ARM_SMMU_FEAT_COHERENCY);
1005 ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
1006 : __arm_smmu_cmdq_issue_sync(smmu);
1008 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
1011 /* Context descriptor manipulation functions */
1012 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
1016 /* Repack the TCR. Just care about TTBR0 for now */
1017 val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
1018 val |= ARM_SMMU_TCR2CD(tcr, TG0);
1019 val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
1020 val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
1021 val |= ARM_SMMU_TCR2CD(tcr, SH0);
1022 val |= ARM_SMMU_TCR2CD(tcr, EPD0);
1023 val |= ARM_SMMU_TCR2CD(tcr, EPD1);
1024 val |= ARM_SMMU_TCR2CD(tcr, IPS);
1025 val |= ARM_SMMU_TCR2CD(tcr, TBI0);
1030 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
1031 struct arm_smmu_s1_cfg *cfg)
1036 * We don't need to issue any invalidation here, as we'll invalidate
1037 * the STE when installing the new entry anyway.
1039 val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
1043 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1044 CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
1047 /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1048 if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1049 val |= CTXDESC_CD_0_S;
1051 cfg->cdptr[0] = cpu_to_le64(val);
1053 val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
1054 cfg->cdptr[1] = cpu_to_le64(val);
1056 cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
1059 /* Stream table manipulation functions */
1061 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1065 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1066 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1068 *dst = cpu_to_le64(val);
1071 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1073 struct arm_smmu_cmdq_ent cmd = {
1074 .opcode = CMDQ_OP_CFGI_STE,
1081 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1082 arm_smmu_cmdq_issue_sync(smmu);
1085 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
1086 __le64 *dst, struct arm_smmu_strtab_ent *ste)
1089 * This is hideously complicated, but we only really care about
1090 * three cases at the moment:
1092 * 1. Invalid (all zero) -> bypass/fault (init)
1093 * 2. Bypass/fault -> translation/bypass (attach)
1094 * 3. Translation/bypass -> bypass/fault (detach)
1096 * Given that we can't update the STE atomically and the SMMU
1097 * doesn't read the thing in a defined order, that leaves us
1098 * with the following maintenance requirements:
1100 * 1. Update Config, return (init time STEs aren't live)
1101 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1102 * 3. Update Config, sync
1104 u64 val = le64_to_cpu(dst[0]);
1105 bool ste_live = false;
1106 struct arm_smmu_cmdq_ent prefetch_cmd = {
1107 .opcode = CMDQ_OP_PREFETCH_CFG,
1113 if (val & STRTAB_STE_0_V) {
1114 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1115 case STRTAB_STE_0_CFG_BYPASS:
1117 case STRTAB_STE_0_CFG_S1_TRANS:
1118 case STRTAB_STE_0_CFG_S2_TRANS:
1121 case STRTAB_STE_0_CFG_ABORT:
1125 BUG(); /* STE corruption */
1129 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1130 val = STRTAB_STE_0_V;
1133 if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
1134 if (!ste->assigned && disable_bypass)
1135 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1137 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1139 dst[0] = cpu_to_le64(val);
1140 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1141 STRTAB_STE_1_SHCFG_INCOMING));
1142 dst[2] = 0; /* Nuke the VMID */
1144 * The SMMU can perform negative caching, so we must sync
1145 * the STE regardless of whether the old value was live.
1148 arm_smmu_sync_ste_for_sid(smmu, sid);
1154 dst[1] = cpu_to_le64(
1155 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1156 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1157 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1158 #ifdef CONFIG_PCI_ATS
1159 FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
1161 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1163 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1164 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1165 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1167 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1168 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
1173 dst[2] = cpu_to_le64(
1174 FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
1175 FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
1177 STRTAB_STE_2_S2ENDI |
1179 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1182 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1184 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1187 arm_smmu_sync_ste_for_sid(smmu, sid);
1188 /* See comment in arm_smmu_write_ctx_desc() */
1189 WRITE_ONCE(dst[0], cpu_to_le64(val));
1190 arm_smmu_sync_ste_for_sid(smmu, sid);
1192 /* It's likely that we'll want to use the new STE soon */
1193 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1194 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1197 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1200 struct arm_smmu_strtab_ent ste = { .assigned = false };
1202 for (i = 0; i < nent; ++i) {
1203 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1204 strtab += STRTAB_STE_DWORDS;
1208 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1212 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1213 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1218 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1219 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1221 desc->span = STRTAB_SPLIT + 1;
1222 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1223 GFP_KERNEL | __GFP_ZERO);
1226 "failed to allocate l2 stream table for SID %u\n",
1231 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1232 arm_smmu_write_strtab_l1_desc(strtab, desc);
1236 /* IRQ and event handlers */
1237 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1240 struct arm_smmu_device *smmu = dev;
1241 struct arm_smmu_queue *q = &smmu->evtq.q;
1242 u64 evt[EVTQ_ENT_DWORDS];
1245 while (!queue_remove_raw(q, evt)) {
1246 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1248 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1249 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1250 dev_info(smmu->dev, "\t0x%016llx\n",
1251 (unsigned long long)evt[i]);
1257 * Not much we can do on overflow, so scream and pretend we're
1260 if (queue_sync_prod(q) == -EOVERFLOW)
1261 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1262 } while (!queue_empty(q));
1264 /* Sync our overflow flag, as we believe we're up to speed */
1265 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1269 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1275 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1276 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1277 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1278 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1279 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1281 dev_info(smmu->dev, "unexpected PRI request received:\n");
1283 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1284 sid, ssid, grpid, last ? "L" : "",
1285 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1286 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1287 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1288 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1289 evt[1] & PRIQ_1_ADDR_MASK);
1292 struct arm_smmu_cmdq_ent cmd = {
1293 .opcode = CMDQ_OP_PRI_RESP,
1294 .substream_valid = ssv,
1299 .resp = PRI_RESP_DENY,
1303 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1307 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1309 struct arm_smmu_device *smmu = dev;
1310 struct arm_smmu_queue *q = &smmu->priq.q;
1311 u64 evt[PRIQ_ENT_DWORDS];
1314 while (!queue_remove_raw(q, evt))
1315 arm_smmu_handle_ppr(smmu, evt);
1317 if (queue_sync_prod(q) == -EOVERFLOW)
1318 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1319 } while (!queue_empty(q));
1321 /* Sync our overflow flag, as we believe we're up to speed */
1322 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1323 writel(q->cons, q->cons_reg);
1327 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1329 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1331 u32 gerror, gerrorn, active;
1332 struct arm_smmu_device *smmu = dev;
1334 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1335 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1337 active = gerror ^ gerrorn;
1338 if (!(active & GERROR_ERR_MASK))
1339 return IRQ_NONE; /* No errors pending */
1342 "unexpected global error reported (0x%08x), this could be serious\n",
1345 if (active & GERROR_SFM_ERR) {
1346 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1347 arm_smmu_device_disable(smmu);
1350 if (active & GERROR_MSI_GERROR_ABT_ERR)
1351 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1353 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1354 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1356 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1357 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1359 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1360 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1362 if (active & GERROR_PRIQ_ABT_ERR)
1363 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1365 if (active & GERROR_EVTQ_ABT_ERR)
1366 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1368 if (active & GERROR_CMDQ_ERR)
1369 arm_smmu_cmdq_skip_err(smmu);
1371 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1375 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1377 struct arm_smmu_device *smmu = dev;
1379 arm_smmu_evtq_thread(irq, dev);
1380 if (smmu->features & ARM_SMMU_FEAT_PRI)
1381 arm_smmu_priq_thread(irq, dev);
1386 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1388 arm_smmu_gerror_handler(irq, dev);
1389 return IRQ_WAKE_THREAD;
1392 /* IO_PGTABLE API */
1393 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1395 arm_smmu_cmdq_issue_sync(smmu);
1398 static void arm_smmu_tlb_sync(void *cookie)
1400 struct arm_smmu_domain *smmu_domain = cookie;
1401 __arm_smmu_tlb_sync(smmu_domain->smmu);
1404 static void arm_smmu_tlb_inv_context(void *cookie)
1406 struct arm_smmu_domain *smmu_domain = cookie;
1407 struct arm_smmu_device *smmu = smmu_domain->smmu;
1408 struct arm_smmu_cmdq_ent cmd;
1410 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1411 cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
1412 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1415 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1416 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1419 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1420 __arm_smmu_tlb_sync(smmu);
1423 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1424 size_t granule, bool leaf, void *cookie)
1426 struct arm_smmu_domain *smmu_domain = cookie;
1427 struct arm_smmu_device *smmu = smmu_domain->smmu;
1428 struct arm_smmu_cmdq_ent cmd = {
1435 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1436 cmd.opcode = CMDQ_OP_TLBI_NH_VA;
1437 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1439 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1440 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1444 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1445 cmd.tlbi.addr += granule;
1446 } while (size -= granule);
1449 static const struct iommu_gather_ops arm_smmu_gather_ops = {
1450 .tlb_flush_all = arm_smmu_tlb_inv_context,
1451 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
1452 .tlb_sync = arm_smmu_tlb_sync,
1456 static bool arm_smmu_capable(enum iommu_cap cap)
1459 case IOMMU_CAP_CACHE_COHERENCY:
1461 case IOMMU_CAP_NOEXEC:
1468 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1470 struct arm_smmu_domain *smmu_domain;
1472 if (type != IOMMU_DOMAIN_UNMANAGED &&
1473 type != IOMMU_DOMAIN_DMA &&
1474 type != IOMMU_DOMAIN_IDENTITY)
1478 * Allocate the domain and initialise some of its data structures.
1479 * We can't really do anything meaningful until we've added a
1482 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1486 if (type == IOMMU_DOMAIN_DMA &&
1487 iommu_get_dma_cookie(&smmu_domain->domain)) {
1492 mutex_init(&smmu_domain->init_mutex);
1493 return &smmu_domain->domain;
1496 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1498 int idx, size = 1 << span;
1501 idx = find_first_zero_bit(map, size);
1504 } while (test_and_set_bit(idx, map));
1509 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1511 clear_bit(idx, map);
1514 static void arm_smmu_domain_free(struct iommu_domain *domain)
1516 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1517 struct arm_smmu_device *smmu = smmu_domain->smmu;
1519 iommu_put_dma_cookie(domain);
1520 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1522 /* Free the CD and ASID, if we allocated them */
1523 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1524 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1527 dmam_free_coherent(smmu_domain->smmu->dev,
1528 CTXDESC_CD_DWORDS << 3,
1532 arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1535 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1537 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1543 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1544 struct io_pgtable_cfg *pgtbl_cfg)
1548 struct arm_smmu_device *smmu = smmu_domain->smmu;
1549 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1551 asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1555 cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1557 GFP_KERNEL | __GFP_ZERO);
1559 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1564 cfg->cd.asid = (u16)asid;
1565 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1566 cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1567 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1571 arm_smmu_bitmap_free(smmu->asid_map, asid);
1575 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1576 struct io_pgtable_cfg *pgtbl_cfg)
1579 struct arm_smmu_device *smmu = smmu_domain->smmu;
1580 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1582 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1586 cfg->vmid = (u16)vmid;
1587 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1588 cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1592 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1595 unsigned long ias, oas;
1596 enum io_pgtable_fmt fmt;
1597 struct io_pgtable_cfg pgtbl_cfg;
1598 struct io_pgtable_ops *pgtbl_ops;
1599 int (*finalise_stage_fn)(struct arm_smmu_domain *,
1600 struct io_pgtable_cfg *);
1601 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1602 struct arm_smmu_device *smmu = smmu_domain->smmu;
1604 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1605 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1609 /* Restrict the stage to what we can actually support */
1610 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1611 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1612 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1613 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1615 switch (smmu_domain->stage) {
1616 case ARM_SMMU_DOMAIN_S1:
1617 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1618 ias = min_t(unsigned long, ias, VA_BITS);
1620 fmt = ARM_64_LPAE_S1;
1621 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1623 case ARM_SMMU_DOMAIN_NESTED:
1624 case ARM_SMMU_DOMAIN_S2:
1627 fmt = ARM_64_LPAE_S2;
1628 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1634 pgtbl_cfg = (struct io_pgtable_cfg) {
1635 .pgsize_bitmap = smmu->pgsize_bitmap,
1638 .tlb = &arm_smmu_gather_ops,
1639 .iommu_dev = smmu->dev,
1642 if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
1643 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
1645 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1649 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1650 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1651 domain->geometry.force_aperture = true;
1653 ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1655 free_io_pgtable_ops(pgtbl_ops);
1659 smmu_domain->pgtbl_ops = pgtbl_ops;
1663 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1666 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1668 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1669 struct arm_smmu_strtab_l1_desc *l1_desc;
1672 /* Two-level walk */
1673 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1674 l1_desc = &cfg->l1_desc[idx];
1675 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1676 step = &l1_desc->l2ptr[idx];
1678 /* Simple linear lookup */
1679 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1685 static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1688 struct arm_smmu_master_data *master = fwspec->iommu_priv;
1689 struct arm_smmu_device *smmu = master->smmu;
1691 for (i = 0; i < fwspec->num_ids; ++i) {
1692 u32 sid = fwspec->ids[i];
1693 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1695 /* Bridged PCI devices may end up with duplicated IDs */
1696 for (j = 0; j < i; j++)
1697 if (fwspec->ids[j] == sid)
1702 arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
1706 static void arm_smmu_detach_dev(struct device *dev)
1708 struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
1710 master->ste.assigned = false;
1711 arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
1714 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1717 struct arm_smmu_device *smmu;
1718 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1719 struct arm_smmu_master_data *master;
1720 struct arm_smmu_strtab_ent *ste;
1722 if (!dev->iommu_fwspec)
1725 master = dev->iommu_fwspec->iommu_priv;
1726 smmu = master->smmu;
1729 /* Already attached to a different domain? */
1731 arm_smmu_detach_dev(dev);
1733 mutex_lock(&smmu_domain->init_mutex);
1735 if (!smmu_domain->smmu) {
1736 smmu_domain->smmu = smmu;
1737 ret = arm_smmu_domain_finalise(domain);
1739 smmu_domain->smmu = NULL;
1742 } else if (smmu_domain->smmu != smmu) {
1744 "cannot attach to SMMU %s (upstream of %s)\n",
1745 dev_name(smmu_domain->smmu->dev),
1746 dev_name(smmu->dev));
1751 ste->assigned = true;
1753 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
1756 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1757 ste->s1_cfg = &smmu_domain->s1_cfg;
1759 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1762 ste->s2_cfg = &smmu_domain->s2_cfg;
1765 arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
1767 mutex_unlock(&smmu_domain->init_mutex);
1771 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1772 phys_addr_t paddr, size_t size, int prot)
1774 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1779 return ops->map(ops, iova, paddr, size, prot);
1783 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1785 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1790 return ops->unmap(ops, iova, size);
1793 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1795 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1798 __arm_smmu_tlb_sync(smmu);
1802 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1804 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1806 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1812 return ops->iova_to_phys(ops, iova);
1815 static struct platform_driver arm_smmu_driver;
1817 static int arm_smmu_match_node(struct device *dev, void *data)
1819 return dev->fwnode == data;
1823 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1825 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1826 fwnode, arm_smmu_match_node);
1828 return dev ? dev_get_drvdata(dev) : NULL;
1831 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1833 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1835 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1836 limit *= 1UL << STRTAB_SPLIT;
1841 static struct iommu_ops arm_smmu_ops;
1843 static int arm_smmu_add_device(struct device *dev)
1846 struct arm_smmu_device *smmu;
1847 struct arm_smmu_master_data *master;
1848 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1849 struct iommu_group *group;
1851 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1854 * We _can_ actually withstand dodgy bus code re-calling add_device()
1855 * without an intervening remove_device()/of_xlate() sequence, but
1856 * we're not going to do so quietly...
1858 if (WARN_ON_ONCE(fwspec->iommu_priv)) {
1859 master = fwspec->iommu_priv;
1860 smmu = master->smmu;
1862 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1865 master = kzalloc(sizeof(*master), GFP_KERNEL);
1869 master->smmu = smmu;
1870 fwspec->iommu_priv = master;
1873 /* Check the SIDs are in range of the SMMU and our stream table */
1874 for (i = 0; i < fwspec->num_ids; i++) {
1875 u32 sid = fwspec->ids[i];
1877 if (!arm_smmu_sid_in_range(smmu, sid))
1880 /* Ensure l2 strtab is initialised */
1881 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1882 ret = arm_smmu_init_l2_strtab(smmu, sid);
1888 group = iommu_group_get_for_dev(dev);
1889 if (!IS_ERR(group)) {
1890 iommu_group_put(group);
1891 iommu_device_link(&smmu->iommu, dev);
1894 return PTR_ERR_OR_ZERO(group);
1897 static void arm_smmu_remove_device(struct device *dev)
1899 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1900 struct arm_smmu_master_data *master;
1901 struct arm_smmu_device *smmu;
1903 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1906 master = fwspec->iommu_priv;
1907 smmu = master->smmu;
1908 if (master && master->ste.assigned)
1909 arm_smmu_detach_dev(dev);
1910 iommu_group_remove_device(dev);
1911 iommu_device_unlink(&smmu->iommu, dev);
1913 iommu_fwspec_free(dev);
1916 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1918 struct iommu_group *group;
1921 * We don't support devices sharing stream IDs other than PCI RID
1922 * aliases, since the necessary ID-to-device lookup becomes rather
1923 * impractical given a potential sparse 32-bit stream ID space.
1925 if (dev_is_pci(dev))
1926 group = pci_device_group(dev);
1928 group = generic_device_group(dev);
1933 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1934 enum iommu_attr attr, void *data)
1936 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1938 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1942 case DOMAIN_ATTR_NESTING:
1943 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1950 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1951 enum iommu_attr attr, void *data)
1954 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1956 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1959 mutex_lock(&smmu_domain->init_mutex);
1962 case DOMAIN_ATTR_NESTING:
1963 if (smmu_domain->smmu) {
1969 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1971 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1979 mutex_unlock(&smmu_domain->init_mutex);
1983 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1985 return iommu_fwspec_add_ids(dev, args->args, 1);
1988 static void arm_smmu_get_resv_regions(struct device *dev,
1989 struct list_head *head)
1991 struct iommu_resv_region *region;
1992 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1994 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1995 prot, IOMMU_RESV_SW_MSI);
1999 list_add_tail(®ion->list, head);
2001 iommu_dma_get_resv_regions(dev, head);
2004 static void arm_smmu_put_resv_regions(struct device *dev,
2005 struct list_head *head)
2007 struct iommu_resv_region *entry, *next;
2009 list_for_each_entry_safe(entry, next, head, list)
2013 static struct iommu_ops arm_smmu_ops = {
2014 .capable = arm_smmu_capable,
2015 .domain_alloc = arm_smmu_domain_alloc,
2016 .domain_free = arm_smmu_domain_free,
2017 .attach_dev = arm_smmu_attach_dev,
2018 .map = arm_smmu_map,
2019 .unmap = arm_smmu_unmap,
2020 .flush_iotlb_all = arm_smmu_iotlb_sync,
2021 .iotlb_sync = arm_smmu_iotlb_sync,
2022 .iova_to_phys = arm_smmu_iova_to_phys,
2023 .add_device = arm_smmu_add_device,
2024 .remove_device = arm_smmu_remove_device,
2025 .device_group = arm_smmu_device_group,
2026 .domain_get_attr = arm_smmu_domain_get_attr,
2027 .domain_set_attr = arm_smmu_domain_set_attr,
2028 .of_xlate = arm_smmu_of_xlate,
2029 .get_resv_regions = arm_smmu_get_resv_regions,
2030 .put_resv_regions = arm_smmu_put_resv_regions,
2031 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2034 /* Probing and initialisation functions */
2035 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2036 struct arm_smmu_queue *q,
2037 unsigned long prod_off,
2038 unsigned long cons_off,
2041 size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
2043 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
2045 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
2050 q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
2051 q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
2052 q->ent_dwords = dwords;
2054 q->q_base = Q_BASE_RWA;
2055 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2056 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
2058 q->prod = q->cons = 0;
2062 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2067 spin_lock_init(&smmu->cmdq.lock);
2068 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2069 ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
2074 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2075 ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
2080 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2083 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2084 ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2087 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2090 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2091 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2092 void *strtab = smmu->strtab_cfg.strtab;
2094 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2095 if (!cfg->l1_desc) {
2096 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2100 for (i = 0; i < cfg->num_l1_ents; ++i) {
2101 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2102 strtab += STRTAB_L1_DESC_DWORDS << 3;
2108 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2113 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2115 /* Calculate the L1 size, capped to the SIDSIZE. */
2116 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2117 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2118 cfg->num_l1_ents = 1 << size;
2120 size += STRTAB_SPLIT;
2121 if (size < smmu->sid_bits)
2123 "2-level strtab only covers %u/%u bits of SID\n",
2124 size, smmu->sid_bits);
2126 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2127 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2128 GFP_KERNEL | __GFP_ZERO);
2131 "failed to allocate l1 stream table (%u bytes)\n",
2135 cfg->strtab = strtab;
2137 /* Configure strtab_base_cfg for 2 levels */
2138 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2139 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2140 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2141 cfg->strtab_base_cfg = reg;
2143 return arm_smmu_init_l1_strtab(smmu);
2146 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2151 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2153 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2154 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2155 GFP_KERNEL | __GFP_ZERO);
2158 "failed to allocate linear stream table (%u bytes)\n",
2162 cfg->strtab = strtab;
2163 cfg->num_l1_ents = 1 << smmu->sid_bits;
2165 /* Configure strtab_base_cfg for a linear table covering all SIDs */
2166 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2167 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2168 cfg->strtab_base_cfg = reg;
2170 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2174 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2179 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2180 ret = arm_smmu_init_strtab_2lvl(smmu);
2182 ret = arm_smmu_init_strtab_linear(smmu);
2187 /* Set the strtab base address */
2188 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2189 reg |= STRTAB_BASE_RA;
2190 smmu->strtab_cfg.strtab_base = reg;
2192 /* Allocate the first VMID for stage-2 bypass STEs */
2193 set_bit(0, smmu->vmid_map);
2197 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2201 ret = arm_smmu_init_queues(smmu);
2205 return arm_smmu_init_strtab(smmu);
2208 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2209 unsigned int reg_off, unsigned int ack_off)
2213 writel_relaxed(val, smmu->base + reg_off);
2214 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2215 1, ARM_SMMU_POLL_TIMEOUT_US);
2218 /* GBPA is "special" */
2219 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2222 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2224 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2225 1, ARM_SMMU_POLL_TIMEOUT_US);
2231 writel_relaxed(reg | GBPA_UPDATE, gbpa);
2232 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2233 1, ARM_SMMU_POLL_TIMEOUT_US);
2236 dev_err(smmu->dev, "GBPA not responding to update\n");
2240 static void arm_smmu_free_msis(void *data)
2242 struct device *dev = data;
2243 platform_msi_domain_free_irqs(dev);
2246 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2248 phys_addr_t doorbell;
2249 struct device *dev = msi_desc_to_dev(desc);
2250 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2251 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2253 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2254 doorbell &= MSI_CFG0_ADDR_MASK;
2256 writeq_relaxed(doorbell, smmu->base + cfg[0]);
2257 writel_relaxed(msg->data, smmu->base + cfg[1]);
2258 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2261 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2263 struct msi_desc *desc;
2264 int ret, nvec = ARM_SMMU_MAX_MSIS;
2265 struct device *dev = smmu->dev;
2267 /* Clear the MSI address regs */
2268 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2269 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2271 if (smmu->features & ARM_SMMU_FEAT_PRI)
2272 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2276 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2279 if (!dev->msi_domain) {
2280 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2284 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2285 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2287 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2291 for_each_msi_entry(desc, dev) {
2292 switch (desc->platform.msi_index) {
2293 case EVTQ_MSI_INDEX:
2294 smmu->evtq.q.irq = desc->irq;
2296 case GERROR_MSI_INDEX:
2297 smmu->gerr_irq = desc->irq;
2299 case PRIQ_MSI_INDEX:
2300 smmu->priq.q.irq = desc->irq;
2302 default: /* Unknown */
2307 /* Add callback to free MSIs on teardown */
2308 devm_add_action(dev, arm_smmu_free_msis, dev);
2311 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2315 arm_smmu_setup_msis(smmu);
2317 /* Request interrupt lines */
2318 irq = smmu->evtq.q.irq;
2320 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2321 arm_smmu_evtq_thread,
2323 "arm-smmu-v3-evtq", smmu);
2325 dev_warn(smmu->dev, "failed to enable evtq irq\n");
2327 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2330 irq = smmu->gerr_irq;
2332 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2333 0, "arm-smmu-v3-gerror", smmu);
2335 dev_warn(smmu->dev, "failed to enable gerror irq\n");
2337 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2340 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2341 irq = smmu->priq.q.irq;
2343 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2344 arm_smmu_priq_thread,
2350 "failed to enable priq irq\n");
2352 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2357 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2360 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2362 /* Disable IRQs first */
2363 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2364 ARM_SMMU_IRQ_CTRLACK);
2366 dev_err(smmu->dev, "failed to disable irqs\n");
2370 irq = smmu->combined_irq;
2373 * Cavium ThunderX2 implementation doesn't not support unique
2374 * irq lines. Use single irq line for all the SMMUv3 interrupts.
2376 ret = devm_request_threaded_irq(smmu->dev, irq,
2377 arm_smmu_combined_irq_handler,
2378 arm_smmu_combined_irq_thread,
2380 "arm-smmu-v3-combined-irq", smmu);
2382 dev_warn(smmu->dev, "failed to enable combined irq\n");
2384 arm_smmu_setup_unique_irqs(smmu);
2386 if (smmu->features & ARM_SMMU_FEAT_PRI)
2387 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2389 /* Enable interrupt generation on the SMMU */
2390 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2391 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2393 dev_warn(smmu->dev, "failed to enable irqs\n");
2398 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2402 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2404 dev_err(smmu->dev, "failed to clear cr0\n");
2409 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2413 struct arm_smmu_cmdq_ent cmd;
2415 /* Clear CR0 and sync (disables SMMU and queue processing) */
2416 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2417 if (reg & CR0_SMMUEN) {
2418 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2419 WARN_ON(is_kdump_kernel() && !disable_bypass);
2420 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
2423 ret = arm_smmu_device_disable(smmu);
2427 /* CR1 (table and queue memory attributes) */
2428 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
2429 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
2430 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
2431 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
2432 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
2433 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
2434 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2436 /* CR2 (random crap) */
2437 reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2438 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2441 writeq_relaxed(smmu->strtab_cfg.strtab_base,
2442 smmu->base + ARM_SMMU_STRTAB_BASE);
2443 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2444 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2447 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2448 writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2449 writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2451 enables = CR0_CMDQEN;
2452 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2455 dev_err(smmu->dev, "failed to enable command queue\n");
2459 /* Invalidate any cached configuration */
2460 cmd.opcode = CMDQ_OP_CFGI_ALL;
2461 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2462 arm_smmu_cmdq_issue_sync(smmu);
2464 /* Invalidate any stale TLB entries */
2465 if (smmu->features & ARM_SMMU_FEAT_HYP) {
2466 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2467 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2470 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2471 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2472 arm_smmu_cmdq_issue_sync(smmu);
2475 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2476 writel_relaxed(smmu->evtq.q.prod,
2477 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
2478 writel_relaxed(smmu->evtq.q.cons,
2479 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
2481 enables |= CR0_EVTQEN;
2482 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2485 dev_err(smmu->dev, "failed to enable event queue\n");
2490 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2491 writeq_relaxed(smmu->priq.q.q_base,
2492 smmu->base + ARM_SMMU_PRIQ_BASE);
2493 writel_relaxed(smmu->priq.q.prod,
2494 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
2495 writel_relaxed(smmu->priq.q.cons,
2496 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
2498 enables |= CR0_PRIQEN;
2499 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2502 dev_err(smmu->dev, "failed to enable PRI queue\n");
2507 ret = arm_smmu_setup_irqs(smmu);
2509 dev_err(smmu->dev, "failed to setup irqs\n");
2513 if (is_kdump_kernel())
2514 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
2516 /* Enable the SMMU interface, or ensure bypass */
2517 if (!bypass || disable_bypass) {
2518 enables |= CR0_SMMUEN;
2520 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
2524 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2527 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2534 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
2537 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
2540 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2542 /* 2-level structures */
2543 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
2544 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2546 if (reg & IDR0_CD2L)
2547 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2550 * Translation table endianness.
2551 * We currently require the same endianness as the CPU, but this
2552 * could be changed later by adding a new IO_PGTABLE_QUIRK.
2554 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
2555 case IDR0_TTENDIAN_MIXED:
2556 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2559 case IDR0_TTENDIAN_BE:
2560 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2563 case IDR0_TTENDIAN_LE:
2564 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2568 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2572 /* Boolean feature flags */
2573 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2574 smmu->features |= ARM_SMMU_FEAT_PRI;
2576 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2577 smmu->features |= ARM_SMMU_FEAT_ATS;
2580 smmu->features |= ARM_SMMU_FEAT_SEV;
2583 smmu->features |= ARM_SMMU_FEAT_MSI;
2586 smmu->features |= ARM_SMMU_FEAT_HYP;
2589 * The coherency feature as set by FW is used in preference to the ID
2590 * register, but warn on mismatch.
2592 if (!!(reg & IDR0_COHACC) != coherent)
2593 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
2594 coherent ? "true" : "false");
2596 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
2597 case IDR0_STALL_MODEL_FORCE:
2598 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
2600 case IDR0_STALL_MODEL_STALL:
2601 smmu->features |= ARM_SMMU_FEAT_STALLS;
2605 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2608 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2610 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2611 dev_err(smmu->dev, "no translation support!\n");
2615 /* We only support the AArch64 table format at present */
2616 switch (FIELD_GET(IDR0_TTF, reg)) {
2617 case IDR0_TTF_AARCH32_64:
2620 case IDR0_TTF_AARCH64:
2623 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2627 /* ASID/VMID sizes */
2628 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2629 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2632 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2633 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2634 dev_err(smmu->dev, "embedded implementation not supported\n");
2638 /* Queue sizes, capped at 4k */
2639 smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
2640 FIELD_GET(IDR1_CMDQS, reg));
2641 if (!smmu->cmdq.q.max_n_shift) {
2642 /* Odd alignment restrictions on the base, so ignore for now */
2643 dev_err(smmu->dev, "unit-length command queue not supported\n");
2647 smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
2648 FIELD_GET(IDR1_EVTQS, reg));
2649 smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
2650 FIELD_GET(IDR1_PRIQS, reg));
2652 /* SID/SSID sizes */
2653 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
2654 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
2657 * If the SMMU supports fewer bits than would fill a single L2 stream
2658 * table, use a linear table instead.
2660 if (smmu->sid_bits <= STRTAB_SPLIT)
2661 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
2664 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2666 /* Maximum number of outstanding stalls */
2667 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
2670 if (reg & IDR5_GRAN64K)
2671 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2672 if (reg & IDR5_GRAN16K)
2673 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2674 if (reg & IDR5_GRAN4K)
2675 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2677 /* Input address size */
2678 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
2679 smmu->features |= ARM_SMMU_FEAT_VAX;
2681 /* Output address size */
2682 switch (FIELD_GET(IDR5_OAS, reg)) {
2683 case IDR5_OAS_32_BIT:
2686 case IDR5_OAS_36_BIT:
2689 case IDR5_OAS_40_BIT:
2692 case IDR5_OAS_42_BIT:
2695 case IDR5_OAS_44_BIT:
2698 case IDR5_OAS_52_BIT:
2700 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
2704 "unknown output address size. Truncating to 48-bit\n");
2706 case IDR5_OAS_48_BIT:
2710 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2711 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2713 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2715 /* Set the DMA mask for our table walker */
2716 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2718 "failed to set DMA mask for table walker\n");
2720 smmu->ias = max(smmu->ias, smmu->oas);
2722 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2723 smmu->ias, smmu->oas, smmu->features);
2728 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
2731 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
2732 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
2734 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
2735 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
2739 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
2742 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2743 struct arm_smmu_device *smmu)
2745 struct acpi_iort_smmu_v3 *iort_smmu;
2746 struct device *dev = smmu->dev;
2747 struct acpi_iort_node *node;
2749 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
2751 /* Retrieve SMMUv3 specific data */
2752 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
2754 acpi_smmu_get_options(iort_smmu->model, smmu);
2756 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
2757 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2762 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2763 struct arm_smmu_device *smmu)
2769 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2770 struct arm_smmu_device *smmu)
2772 struct device *dev = &pdev->dev;
2776 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
2777 dev_err(dev, "missing #iommu-cells property\n");
2778 else if (cells != 1)
2779 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
2783 parse_driver_options(smmu);
2785 if (of_dma_is_coherent(dev->of_node))
2786 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2791 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
2793 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
2799 static int arm_smmu_device_probe(struct platform_device *pdev)
2802 struct resource *res;
2803 resource_size_t ioaddr;
2804 struct arm_smmu_device *smmu;
2805 struct device *dev = &pdev->dev;
2808 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2810 dev_err(dev, "failed to allocate arm_smmu_device\n");
2816 ret = arm_smmu_device_dt_probe(pdev, smmu);
2818 ret = arm_smmu_device_acpi_probe(pdev, smmu);
2823 /* Set bypass mode according to firmware probing result */
2827 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2828 if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
2829 dev_err(dev, "MMIO region too small (%pr)\n", res);
2832 ioaddr = res->start;
2834 smmu->base = devm_ioremap_resource(dev, res);
2835 if (IS_ERR(smmu->base))
2836 return PTR_ERR(smmu->base);
2838 /* Interrupt lines */
2840 irq = platform_get_irq_byname(pdev, "combined");
2842 smmu->combined_irq = irq;
2844 irq = platform_get_irq_byname(pdev, "eventq");
2846 smmu->evtq.q.irq = irq;
2848 irq = platform_get_irq_byname(pdev, "priq");
2850 smmu->priq.q.irq = irq;
2852 irq = platform_get_irq_byname(pdev, "gerror");
2854 smmu->gerr_irq = irq;
2857 ret = arm_smmu_device_hw_probe(smmu);
2861 /* Initialise in-memory data structures */
2862 ret = arm_smmu_init_structures(smmu);
2866 /* Record our private device structure */
2867 platform_set_drvdata(pdev, smmu);
2869 /* Reset the device */
2870 ret = arm_smmu_device_reset(smmu, bypass);
2874 /* And we're up. Go go go! */
2875 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
2876 "smmu3.%pa", &ioaddr);
2880 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2881 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2883 ret = iommu_device_register(&smmu->iommu);
2885 dev_err(dev, "Failed to register iommu\n");
2890 if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
2892 ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2897 #ifdef CONFIG_ARM_AMBA
2898 if (amba_bustype.iommu_ops != &arm_smmu_ops) {
2899 ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2904 if (platform_bus_type.iommu_ops != &arm_smmu_ops) {
2905 ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2912 static int arm_smmu_device_remove(struct platform_device *pdev)
2914 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2916 arm_smmu_device_disable(smmu);
2921 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2923 arm_smmu_device_remove(pdev);
2926 static const struct of_device_id arm_smmu_of_match[] = {
2927 { .compatible = "arm,smmu-v3", },
2930 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2932 static struct platform_driver arm_smmu_driver = {
2934 .name = "arm-smmu-v3",
2935 .of_match_table = of_match_ptr(arm_smmu_of_match),
2937 .probe = arm_smmu_device_probe,
2938 .remove = arm_smmu_device_remove,
2939 .shutdown = arm_smmu_device_shutdown,
2941 module_platform_driver(arm_smmu_driver);
2943 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2944 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2945 MODULE_LICENSE("GPL v2");