GNU Linux-libre 5.10.217-gnu1
[releases.git] / drivers / iommu / intel / dmar.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16
17 #define pr_fmt(fmt)     "DMAR: " fmt
18
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/iommu_table.h>
34
35 #include "../irq_remapping.h"
36
37 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
38 struct dmar_res_callback {
39         dmar_res_handler_t      cb[ACPI_DMAR_TYPE_RESERVED];
40         void                    *arg[ACPI_DMAR_TYPE_RESERVED];
41         bool                    ignore_unhandled;
42         bool                    print_entry;
43 };
44
45 /*
46  * Assumptions:
47  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
48  *    before IO devices managed by that unit.
49  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
50  *    after IO devices managed by that unit.
51  * 3) Hotplug events are rare.
52  *
53  * Locking rules for DMA and interrupt remapping related global data structures:
54  * 1) Use dmar_global_lock in process context
55  * 2) Use RCU in interrupt context
56  */
57 DECLARE_RWSEM(dmar_global_lock);
58 LIST_HEAD(dmar_drhd_units);
59
60 struct acpi_table_header * __initdata dmar_tbl;
61 static int dmar_dev_scope_status = 1;
62 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
63
64 static int alloc_iommu(struct dmar_drhd_unit *drhd);
65 static void free_iommu(struct intel_iommu *iommu);
66
67 extern const struct iommu_ops intel_iommu_ops;
68
69 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71         /*
72          * add INCLUDE_ALL at the tail, so scan the list will find it at
73          * the very end.
74          */
75         if (drhd->include_all)
76                 list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77         else
78                 list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83         struct acpi_dmar_device_scope *scope;
84
85         *cnt = 0;
86         while (start < end) {
87                 scope = start;
88                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91                         (*cnt)++;
92                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93                         scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94                         pr_warn("Unsupported device scope\n");
95                 }
96                 start += scope->length;
97         }
98         if (*cnt == 0)
99                 return NULL;
100
101         return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106         int i;
107         struct device *tmp_dev;
108
109         if (*devices && *cnt) {
110                 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111                         put_device(tmp_dev);
112                 kfree(*devices);
113         }
114
115         *devices = NULL;
116         *cnt = 0;
117 }
118
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125         int level = 0;
126         size_t size;
127         struct pci_dev *tmp;
128         struct dmar_pci_notify_info *info;
129
130         BUG_ON(dev->is_virtfn);
131
132         /*
133          * Ignore devices that have a domain number higher than what can
134          * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
135          */
136         if (pci_domain_nr(dev->bus) > U16_MAX)
137                 return NULL;
138
139         /* Only generate path[] for device addition event */
140         if (event == BUS_NOTIFY_ADD_DEVICE)
141                 for (tmp = dev; tmp; tmp = tmp->bus->self)
142                         level++;
143
144         size = struct_size(info, path, level);
145         if (size <= sizeof(dmar_pci_notify_info_buf)) {
146                 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
147         } else {
148                 info = kzalloc(size, GFP_KERNEL);
149                 if (!info) {
150                         pr_warn("Out of memory when allocating notify_info "
151                                 "for %s.\n", pci_name(dev));
152                         if (dmar_dev_scope_status == 0)
153                                 dmar_dev_scope_status = -ENOMEM;
154                         return NULL;
155                 }
156         }
157
158         info->event = event;
159         info->dev = dev;
160         info->seg = pci_domain_nr(dev->bus);
161         info->level = level;
162         if (event == BUS_NOTIFY_ADD_DEVICE) {
163                 for (tmp = dev; tmp; tmp = tmp->bus->self) {
164                         level--;
165                         info->path[level].bus = tmp->bus->number;
166                         info->path[level].device = PCI_SLOT(tmp->devfn);
167                         info->path[level].function = PCI_FUNC(tmp->devfn);
168                         if (pci_is_root_bus(tmp->bus))
169                                 info->bus = tmp->bus->number;
170                 }
171         }
172
173         return info;
174 }
175
176 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
177 {
178         if ((void *)info != dmar_pci_notify_info_buf)
179                 kfree(info);
180 }
181
182 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
183                                 struct acpi_dmar_pci_path *path, int count)
184 {
185         int i;
186
187         if (info->bus != bus)
188                 goto fallback;
189         if (info->level != count)
190                 goto fallback;
191
192         for (i = 0; i < count; i++) {
193                 if (path[i].device != info->path[i].device ||
194                     path[i].function != info->path[i].function)
195                         goto fallback;
196         }
197
198         return true;
199
200 fallback:
201
202         if (count != 1)
203                 return false;
204
205         i = info->level - 1;
206         if (bus              == info->path[i].bus &&
207             path[0].device   == info->path[i].device &&
208             path[0].function == info->path[i].function) {
209                 pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
210                         bus, path[0].device, path[0].function);
211                 return true;
212         }
213
214         return false;
215 }
216
217 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
218 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
219                           void *start, void*end, u16 segment,
220                           struct dmar_dev_scope *devices,
221                           int devices_cnt)
222 {
223         int i, level;
224         struct device *tmp, *dev = &info->dev->dev;
225         struct acpi_dmar_device_scope *scope;
226         struct acpi_dmar_pci_path *path;
227
228         if (segment != info->seg)
229                 return 0;
230
231         for (; start < end; start += scope->length) {
232                 scope = start;
233                 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
234                     scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
235                         continue;
236
237                 path = (struct acpi_dmar_pci_path *)(scope + 1);
238                 level = (scope->length - sizeof(*scope)) / sizeof(*path);
239                 if (!dmar_match_pci_path(info, scope->bus, path, level))
240                         continue;
241
242                 /*
243                  * We expect devices with endpoint scope to have normal PCI
244                  * headers, and devices with bridge scope to have bridge PCI
245                  * headers.  However PCI NTB devices may be listed in the
246                  * DMAR table with bridge scope, even though they have a
247                  * normal PCI header.  NTB devices are identified by class
248                  * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
249                  * for this special case.
250                  */
251                 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
252                      info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
253                     (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
254                      (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
255                       info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
256                         pr_warn("Device scope type does not match for %s\n",
257                                 pci_name(info->dev));
258                         return -EINVAL;
259                 }
260
261                 for_each_dev_scope(devices, devices_cnt, i, tmp)
262                         if (tmp == NULL) {
263                                 devices[i].bus = info->dev->bus->number;
264                                 devices[i].devfn = info->dev->devfn;
265                                 rcu_assign_pointer(devices[i].dev,
266                                                    get_device(dev));
267                                 return 1;
268                         }
269                 BUG_ON(i >= devices_cnt);
270         }
271
272         return 0;
273 }
274
275 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
276                           struct dmar_dev_scope *devices, int count)
277 {
278         int index;
279         struct device *tmp;
280
281         if (info->seg != segment)
282                 return 0;
283
284         for_each_active_dev_scope(devices, count, index, tmp)
285                 if (tmp == &info->dev->dev) {
286                         RCU_INIT_POINTER(devices[index].dev, NULL);
287                         synchronize_rcu();
288                         put_device(tmp);
289                         return 1;
290                 }
291
292         return 0;
293 }
294
295 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
296 {
297         int ret = 0;
298         struct dmar_drhd_unit *dmaru;
299         struct acpi_dmar_hardware_unit *drhd;
300
301         for_each_drhd_unit(dmaru) {
302                 if (dmaru->include_all)
303                         continue;
304
305                 drhd = container_of(dmaru->hdr,
306                                     struct acpi_dmar_hardware_unit, header);
307                 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
308                                 ((void *)drhd) + drhd->header.length,
309                                 dmaru->segment,
310                                 dmaru->devices, dmaru->devices_cnt);
311                 if (ret)
312                         break;
313         }
314         if (ret >= 0)
315                 ret = dmar_iommu_notify_scope_dev(info);
316         if (ret < 0 && dmar_dev_scope_status == 0)
317                 dmar_dev_scope_status = ret;
318
319         if (ret >= 0)
320                 intel_irq_remap_add_device(info);
321
322         return ret;
323 }
324
325 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
326 {
327         struct dmar_drhd_unit *dmaru;
328
329         for_each_drhd_unit(dmaru)
330                 if (dmar_remove_dev_scope(info, dmaru->segment,
331                         dmaru->devices, dmaru->devices_cnt))
332                         break;
333         dmar_iommu_notify_scope_dev(info);
334 }
335
336 static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
337 {
338         struct pci_dev *physfn = pci_physfn(pdev);
339
340         dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
341 }
342
343 static int dmar_pci_bus_notifier(struct notifier_block *nb,
344                                  unsigned long action, void *data)
345 {
346         struct pci_dev *pdev = to_pci_dev(data);
347         struct dmar_pci_notify_info *info;
348
349         /* Only care about add/remove events for physical functions.
350          * For VFs we actually do the lookup based on the corresponding
351          * PF in device_to_iommu() anyway. */
352         if (pdev->is_virtfn) {
353                 /*
354                  * Ensure that the VF device inherits the irq domain of the
355                  * PF device. Ideally the device would inherit the domain
356                  * from the bus, but DMAR can have multiple units per bus
357                  * which makes this impossible. The VF 'bus' could inherit
358                  * from the PF device, but that's yet another x86'sism to
359                  * inflict on everybody else.
360                  */
361                 if (action == BUS_NOTIFY_ADD_DEVICE)
362                         vf_inherit_msi_domain(pdev);
363                 return NOTIFY_DONE;
364         }
365
366         if (action != BUS_NOTIFY_ADD_DEVICE &&
367             action != BUS_NOTIFY_REMOVED_DEVICE)
368                 return NOTIFY_DONE;
369
370         info = dmar_alloc_pci_notify_info(pdev, action);
371         if (!info)
372                 return NOTIFY_DONE;
373
374         down_write(&dmar_global_lock);
375         if (action == BUS_NOTIFY_ADD_DEVICE)
376                 dmar_pci_bus_add_dev(info);
377         else if (action == BUS_NOTIFY_REMOVED_DEVICE)
378                 dmar_pci_bus_del_dev(info);
379         up_write(&dmar_global_lock);
380
381         dmar_free_pci_notify_info(info);
382
383         return NOTIFY_OK;
384 }
385
386 static struct notifier_block dmar_pci_bus_nb = {
387         .notifier_call = dmar_pci_bus_notifier,
388         .priority = 1,
389 };
390
391 static struct dmar_drhd_unit *
392 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
393 {
394         struct dmar_drhd_unit *dmaru;
395
396         list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
397                                 dmar_rcu_check())
398                 if (dmaru->segment == drhd->segment &&
399                     dmaru->reg_base_addr == drhd->address)
400                         return dmaru;
401
402         return NULL;
403 }
404
405 /*
406  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
407  * structure which uniquely represent one DMA remapping hardware unit
408  * present in the platform
409  */
410 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
411 {
412         struct acpi_dmar_hardware_unit *drhd;
413         struct dmar_drhd_unit *dmaru;
414         int ret;
415
416         drhd = (struct acpi_dmar_hardware_unit *)header;
417         dmaru = dmar_find_dmaru(drhd);
418         if (dmaru)
419                 goto out;
420
421         dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
422         if (!dmaru)
423                 return -ENOMEM;
424
425         /*
426          * If header is allocated from slab by ACPI _DSM method, we need to
427          * copy the content because the memory buffer will be freed on return.
428          */
429         dmaru->hdr = (void *)(dmaru + 1);
430         memcpy(dmaru->hdr, header, header->length);
431         dmaru->reg_base_addr = drhd->address;
432         dmaru->segment = drhd->segment;
433         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
434         dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
435                                               ((void *)drhd) + drhd->header.length,
436                                               &dmaru->devices_cnt);
437         if (dmaru->devices_cnt && dmaru->devices == NULL) {
438                 kfree(dmaru);
439                 return -ENOMEM;
440         }
441
442         ret = alloc_iommu(dmaru);
443         if (ret) {
444                 dmar_free_dev_scope(&dmaru->devices,
445                                     &dmaru->devices_cnt);
446                 kfree(dmaru);
447                 return ret;
448         }
449         dmar_register_drhd_unit(dmaru);
450
451 out:
452         if (arg)
453                 (*(int *)arg)++;
454
455         return 0;
456 }
457
458 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
459 {
460         if (dmaru->devices && dmaru->devices_cnt)
461                 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
462         if (dmaru->iommu)
463                 free_iommu(dmaru->iommu);
464         kfree(dmaru);
465 }
466
467 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
468                                       void *arg)
469 {
470         struct acpi_dmar_andd *andd = (void *)header;
471
472         /* Check for NUL termination within the designated length */
473         if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
474                 pr_warn(FW_BUG
475                            "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
476                            "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
477                            dmi_get_system_info(DMI_BIOS_VENDOR),
478                            dmi_get_system_info(DMI_BIOS_VERSION),
479                            dmi_get_system_info(DMI_PRODUCT_VERSION));
480                 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
481                 return -EINVAL;
482         }
483         pr_info("ANDD device: %x name: %s\n", andd->device_number,
484                 andd->device_name);
485
486         return 0;
487 }
488
489 #ifdef CONFIG_ACPI_NUMA
490 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
491 {
492         struct acpi_dmar_rhsa *rhsa;
493         struct dmar_drhd_unit *drhd;
494
495         rhsa = (struct acpi_dmar_rhsa *)header;
496         for_each_drhd_unit(drhd) {
497                 if (drhd->reg_base_addr == rhsa->base_address) {
498                         int node = pxm_to_node(rhsa->proximity_domain);
499
500                         if (node != NUMA_NO_NODE && !node_online(node))
501                                 node = NUMA_NO_NODE;
502                         drhd->iommu->node = node;
503                         return 0;
504                 }
505         }
506         pr_warn(FW_BUG
507                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
508                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
509                 rhsa->base_address,
510                 dmi_get_system_info(DMI_BIOS_VENDOR),
511                 dmi_get_system_info(DMI_BIOS_VERSION),
512                 dmi_get_system_info(DMI_PRODUCT_VERSION));
513         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
514
515         return 0;
516 }
517 #else
518 #define dmar_parse_one_rhsa             dmar_res_noop
519 #endif
520
521 static void
522 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
523 {
524         struct acpi_dmar_hardware_unit *drhd;
525         struct acpi_dmar_reserved_memory *rmrr;
526         struct acpi_dmar_atsr *atsr;
527         struct acpi_dmar_rhsa *rhsa;
528
529         switch (header->type) {
530         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
531                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
532                                     header);
533                 pr_info("DRHD base: %#016Lx flags: %#x\n",
534                         (unsigned long long)drhd->address, drhd->flags);
535                 break;
536         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
537                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
538                                     header);
539                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
540                         (unsigned long long)rmrr->base_address,
541                         (unsigned long long)rmrr->end_address);
542                 break;
543         case ACPI_DMAR_TYPE_ROOT_ATS:
544                 atsr = container_of(header, struct acpi_dmar_atsr, header);
545                 pr_info("ATSR flags: %#x\n", atsr->flags);
546                 break;
547         case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
548                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
549                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
550                        (unsigned long long)rhsa->base_address,
551                        rhsa->proximity_domain);
552                 break;
553         case ACPI_DMAR_TYPE_NAMESPACE:
554                 /* We don't print this here because we need to sanity-check
555                    it first. So print it in dmar_parse_one_andd() instead. */
556                 break;
557         }
558 }
559
560 /**
561  * dmar_table_detect - checks to see if the platform supports DMAR devices
562  */
563 static int __init dmar_table_detect(void)
564 {
565         acpi_status status = AE_OK;
566
567         /* if we could find DMAR table, then there are DMAR devices */
568         status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
569
570         if (ACPI_SUCCESS(status) && !dmar_tbl) {
571                 pr_warn("Unable to map DMAR\n");
572                 status = AE_NOT_FOUND;
573         }
574
575         return ACPI_SUCCESS(status) ? 0 : -ENOENT;
576 }
577
578 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
579                                        size_t len, struct dmar_res_callback *cb)
580 {
581         struct acpi_dmar_header *iter, *next;
582         struct acpi_dmar_header *end = ((void *)start) + len;
583
584         for (iter = start; iter < end; iter = next) {
585                 next = (void *)iter + iter->length;
586                 if (iter->length == 0) {
587                         /* Avoid looping forever on bad ACPI tables */
588                         pr_debug(FW_BUG "Invalid 0-length structure\n");
589                         break;
590                 } else if (next > end) {
591                         /* Avoid passing table end */
592                         pr_warn(FW_BUG "Record passes table end\n");
593                         return -EINVAL;
594                 }
595
596                 if (cb->print_entry)
597                         dmar_table_print_dmar_entry(iter);
598
599                 if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
600                         /* continue for forward compatibility */
601                         pr_debug("Unknown DMAR structure type %d\n",
602                                  iter->type);
603                 } else if (cb->cb[iter->type]) {
604                         int ret;
605
606                         ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
607                         if (ret)
608                                 return ret;
609                 } else if (!cb->ignore_unhandled) {
610                         pr_warn("No handler for DMAR structure type %d\n",
611                                 iter->type);
612                         return -EINVAL;
613                 }
614         }
615
616         return 0;
617 }
618
619 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
620                                        struct dmar_res_callback *cb)
621 {
622         return dmar_walk_remapping_entries((void *)(dmar + 1),
623                         dmar->header.length - sizeof(*dmar), cb);
624 }
625
626 /**
627  * parse_dmar_table - parses the DMA reporting table
628  */
629 static int __init
630 parse_dmar_table(void)
631 {
632         struct acpi_table_dmar *dmar;
633         int drhd_count = 0;
634         int ret;
635         struct dmar_res_callback cb = {
636                 .print_entry = true,
637                 .ignore_unhandled = true,
638                 .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
639                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
640                 .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
641                 .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
642                 .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
643                 .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
644         };
645
646         /*
647          * Do it again, earlier dmar_tbl mapping could be mapped with
648          * fixed map.
649          */
650         dmar_table_detect();
651
652         /*
653          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
654          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
655          */
656         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
657
658         dmar = (struct acpi_table_dmar *)dmar_tbl;
659         if (!dmar)
660                 return -ENODEV;
661
662         if (dmar->width < PAGE_SHIFT - 1) {
663                 pr_warn("Invalid DMAR haw\n");
664                 return -EINVAL;
665         }
666
667         pr_info("Host address width %d\n", dmar->width + 1);
668         ret = dmar_walk_dmar_table(dmar, &cb);
669         if (ret == 0 && drhd_count == 0)
670                 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
671
672         return ret;
673 }
674
675 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
676                                  int cnt, struct pci_dev *dev)
677 {
678         int index;
679         struct device *tmp;
680
681         while (dev) {
682                 for_each_active_dev_scope(devices, cnt, index, tmp)
683                         if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
684                                 return 1;
685
686                 /* Check our parent */
687                 dev = dev->bus->self;
688         }
689
690         return 0;
691 }
692
693 struct dmar_drhd_unit *
694 dmar_find_matched_drhd_unit(struct pci_dev *dev)
695 {
696         struct dmar_drhd_unit *dmaru;
697         struct acpi_dmar_hardware_unit *drhd;
698
699         dev = pci_physfn(dev);
700
701         rcu_read_lock();
702         for_each_drhd_unit(dmaru) {
703                 drhd = container_of(dmaru->hdr,
704                                     struct acpi_dmar_hardware_unit,
705                                     header);
706
707                 if (dmaru->include_all &&
708                     drhd->segment == pci_domain_nr(dev->bus))
709                         goto out;
710
711                 if (dmar_pci_device_match(dmaru->devices,
712                                           dmaru->devices_cnt, dev))
713                         goto out;
714         }
715         dmaru = NULL;
716 out:
717         rcu_read_unlock();
718
719         return dmaru;
720 }
721
722 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
723                                               struct acpi_device *adev)
724 {
725         struct dmar_drhd_unit *dmaru;
726         struct acpi_dmar_hardware_unit *drhd;
727         struct acpi_dmar_device_scope *scope;
728         struct device *tmp;
729         int i;
730         struct acpi_dmar_pci_path *path;
731
732         for_each_drhd_unit(dmaru) {
733                 drhd = container_of(dmaru->hdr,
734                                     struct acpi_dmar_hardware_unit,
735                                     header);
736
737                 for (scope = (void *)(drhd + 1);
738                      (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
739                      scope = ((void *)scope) + scope->length) {
740                         if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
741                                 continue;
742                         if (scope->enumeration_id != device_number)
743                                 continue;
744
745                         path = (void *)(scope + 1);
746                         pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
747                                 dev_name(&adev->dev), dmaru->reg_base_addr,
748                                 scope->bus, path->device, path->function);
749                         for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
750                                 if (tmp == NULL) {
751                                         dmaru->devices[i].bus = scope->bus;
752                                         dmaru->devices[i].devfn = PCI_DEVFN(path->device,
753                                                                             path->function);
754                                         rcu_assign_pointer(dmaru->devices[i].dev,
755                                                            get_device(&adev->dev));
756                                         return;
757                                 }
758                         BUG_ON(i >= dmaru->devices_cnt);
759                 }
760         }
761         pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
762                 device_number, dev_name(&adev->dev));
763 }
764
765 static int __init dmar_acpi_dev_scope_init(void)
766 {
767         struct acpi_dmar_andd *andd;
768
769         if (dmar_tbl == NULL)
770                 return -ENODEV;
771
772         for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
773              ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
774              andd = ((void *)andd) + andd->header.length) {
775                 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
776                         acpi_handle h;
777                         struct acpi_device *adev;
778
779                         if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
780                                                           andd->device_name,
781                                                           &h))) {
782                                 pr_err("Failed to find handle for ACPI object %s\n",
783                                        andd->device_name);
784                                 continue;
785                         }
786                         if (acpi_bus_get_device(h, &adev)) {
787                                 pr_err("Failed to get device for ACPI object %s\n",
788                                        andd->device_name);
789                                 continue;
790                         }
791                         dmar_acpi_insert_dev_scope(andd->device_number, adev);
792                 }
793         }
794         return 0;
795 }
796
797 int __init dmar_dev_scope_init(void)
798 {
799         struct pci_dev *dev = NULL;
800         struct dmar_pci_notify_info *info;
801
802         if (dmar_dev_scope_status != 1)
803                 return dmar_dev_scope_status;
804
805         if (list_empty(&dmar_drhd_units)) {
806                 dmar_dev_scope_status = -ENODEV;
807         } else {
808                 dmar_dev_scope_status = 0;
809
810                 dmar_acpi_dev_scope_init();
811
812                 for_each_pci_dev(dev) {
813                         if (dev->is_virtfn)
814                                 continue;
815
816                         info = dmar_alloc_pci_notify_info(dev,
817                                         BUS_NOTIFY_ADD_DEVICE);
818                         if (!info) {
819                                 pci_dev_put(dev);
820                                 return dmar_dev_scope_status;
821                         } else {
822                                 dmar_pci_bus_add_dev(info);
823                                 dmar_free_pci_notify_info(info);
824                         }
825                 }
826         }
827
828         return dmar_dev_scope_status;
829 }
830
831 void __init dmar_register_bus_notifier(void)
832 {
833         bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
834 }
835
836
837 int __init dmar_table_init(void)
838 {
839         static int dmar_table_initialized;
840         int ret;
841
842         if (dmar_table_initialized == 0) {
843                 ret = parse_dmar_table();
844                 if (ret < 0) {
845                         if (ret != -ENODEV)
846                                 pr_info("Parse DMAR table failure.\n");
847                 } else  if (list_empty(&dmar_drhd_units)) {
848                         pr_info("No DMAR devices found\n");
849                         ret = -ENODEV;
850                 }
851
852                 if (ret < 0)
853                         dmar_table_initialized = ret;
854                 else
855                         dmar_table_initialized = 1;
856         }
857
858         return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
859 }
860
861 static void warn_invalid_dmar(u64 addr, const char *message)
862 {
863         pr_warn_once(FW_BUG
864                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
865                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
866                 addr, message,
867                 dmi_get_system_info(DMI_BIOS_VENDOR),
868                 dmi_get_system_info(DMI_BIOS_VERSION),
869                 dmi_get_system_info(DMI_PRODUCT_VERSION));
870         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
871 }
872
873 static int __ref
874 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
875 {
876         struct acpi_dmar_hardware_unit *drhd;
877         void __iomem *addr;
878         u64 cap, ecap;
879
880         drhd = (void *)entry;
881         if (!drhd->address) {
882                 warn_invalid_dmar(0, "");
883                 return -EINVAL;
884         }
885
886         if (arg)
887                 addr = ioremap(drhd->address, VTD_PAGE_SIZE);
888         else
889                 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
890         if (!addr) {
891                 pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
892                 return -EINVAL;
893         }
894
895         cap = dmar_readq(addr + DMAR_CAP_REG);
896         ecap = dmar_readq(addr + DMAR_ECAP_REG);
897
898         if (arg)
899                 iounmap(addr);
900         else
901                 early_iounmap(addr, VTD_PAGE_SIZE);
902
903         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
904                 warn_invalid_dmar(drhd->address, " returns all ones");
905                 return -EINVAL;
906         }
907
908         return 0;
909 }
910
911 int __init detect_intel_iommu(void)
912 {
913         int ret;
914         struct dmar_res_callback validate_drhd_cb = {
915                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
916                 .ignore_unhandled = true,
917         };
918
919         down_write(&dmar_global_lock);
920         ret = dmar_table_detect();
921         if (!ret)
922                 ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
923                                            &validate_drhd_cb);
924         if (!ret && !no_iommu && !iommu_detected &&
925             (!dmar_disabled || dmar_platform_optin())) {
926                 iommu_detected = 1;
927                 /* Make sure ACS will be enabled */
928                 pci_request_acs();
929         }
930
931 #ifdef CONFIG_X86
932         if (!ret) {
933                 x86_init.iommu.iommu_init = intel_iommu_init;
934                 x86_platform.iommu_shutdown = intel_iommu_shutdown;
935         }
936
937 #endif
938
939         if (dmar_tbl) {
940                 acpi_put_table(dmar_tbl);
941                 dmar_tbl = NULL;
942         }
943         up_write(&dmar_global_lock);
944
945         return ret ? ret : 1;
946 }
947
948 static void unmap_iommu(struct intel_iommu *iommu)
949 {
950         iounmap(iommu->reg);
951         release_mem_region(iommu->reg_phys, iommu->reg_size);
952 }
953
954 /**
955  * map_iommu: map the iommu's registers
956  * @iommu: the iommu to map
957  * @phys_addr: the physical address of the base resgister
958  *
959  * Memory map the iommu's registers.  Start w/ a single page, and
960  * possibly expand if that turns out to be insufficent.
961  */
962 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
963 {
964         int map_size, err=0;
965
966         iommu->reg_phys = phys_addr;
967         iommu->reg_size = VTD_PAGE_SIZE;
968
969         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
970                 pr_err("Can't reserve memory\n");
971                 err = -EBUSY;
972                 goto out;
973         }
974
975         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
976         if (!iommu->reg) {
977                 pr_err("Can't map the region\n");
978                 err = -ENOMEM;
979                 goto release;
980         }
981
982         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
983         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
984
985         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
986                 err = -EINVAL;
987                 warn_invalid_dmar(phys_addr, " returns all ones");
988                 goto unmap;
989         }
990         if (ecap_vcs(iommu->ecap))
991                 iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
992
993         /* the registers might be more than one page */
994         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
995                          cap_max_fault_reg_offset(iommu->cap));
996         map_size = VTD_PAGE_ALIGN(map_size);
997         if (map_size > iommu->reg_size) {
998                 iounmap(iommu->reg);
999                 release_mem_region(iommu->reg_phys, iommu->reg_size);
1000                 iommu->reg_size = map_size;
1001                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
1002                                         iommu->name)) {
1003                         pr_err("Can't reserve memory\n");
1004                         err = -EBUSY;
1005                         goto out;
1006                 }
1007                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
1008                 if (!iommu->reg) {
1009                         pr_err("Can't map the region\n");
1010                         err = -ENOMEM;
1011                         goto release;
1012                 }
1013         }
1014         err = 0;
1015         goto out;
1016
1017 unmap:
1018         iounmap(iommu->reg);
1019 release:
1020         release_mem_region(iommu->reg_phys, iommu->reg_size);
1021 out:
1022         return err;
1023 }
1024
1025 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1026 {
1027         iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1028                                             DMAR_UNITS_SUPPORTED);
1029         if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1030                 iommu->seq_id = -1;
1031         } else {
1032                 set_bit(iommu->seq_id, dmar_seq_ids);
1033                 sprintf(iommu->name, "dmar%d", iommu->seq_id);
1034         }
1035
1036         return iommu->seq_id;
1037 }
1038
1039 static void dmar_free_seq_id(struct intel_iommu *iommu)
1040 {
1041         if (iommu->seq_id >= 0) {
1042                 clear_bit(iommu->seq_id, dmar_seq_ids);
1043                 iommu->seq_id = -1;
1044         }
1045 }
1046
1047 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1048 {
1049         struct intel_iommu *iommu;
1050         u32 ver, sts;
1051         int agaw = -1;
1052         int msagaw = -1;
1053         int err;
1054
1055         if (!drhd->reg_base_addr) {
1056                 warn_invalid_dmar(0, "");
1057                 return -EINVAL;
1058         }
1059
1060         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1061         if (!iommu)
1062                 return -ENOMEM;
1063
1064         if (dmar_alloc_seq_id(iommu) < 0) {
1065                 pr_err("Failed to allocate seq_id\n");
1066                 err = -ENOSPC;
1067                 goto error;
1068         }
1069
1070         err = map_iommu(iommu, drhd->reg_base_addr);
1071         if (err) {
1072                 pr_err("Failed to map %s\n", iommu->name);
1073                 goto error_free_seq_id;
1074         }
1075
1076         err = -EINVAL;
1077         if (cap_sagaw(iommu->cap) == 0) {
1078                 pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1079                         iommu->name);
1080                 drhd->ignored = 1;
1081         }
1082
1083         if (!drhd->ignored) {
1084                 agaw = iommu_calculate_agaw(iommu);
1085                 if (agaw < 0) {
1086                         pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1087                                iommu->seq_id);
1088                         drhd->ignored = 1;
1089                 }
1090         }
1091         if (!drhd->ignored) {
1092                 msagaw = iommu_calculate_max_sagaw(iommu);
1093                 if (msagaw < 0) {
1094                         pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1095                                iommu->seq_id);
1096                         drhd->ignored = 1;
1097                         agaw = -1;
1098                 }
1099         }
1100         iommu->agaw = agaw;
1101         iommu->msagaw = msagaw;
1102         iommu->segment = drhd->segment;
1103
1104         iommu->node = NUMA_NO_NODE;
1105
1106         ver = readl(iommu->reg + DMAR_VER_REG);
1107         pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1108                 iommu->name,
1109                 (unsigned long long)drhd->reg_base_addr,
1110                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1111                 (unsigned long long)iommu->cap,
1112                 (unsigned long long)iommu->ecap);
1113
1114         /* Reflect status in gcmd */
1115         sts = readl(iommu->reg + DMAR_GSTS_REG);
1116         if (sts & DMA_GSTS_IRES)
1117                 iommu->gcmd |= DMA_GCMD_IRE;
1118         if (sts & DMA_GSTS_TES)
1119                 iommu->gcmd |= DMA_GCMD_TE;
1120         if (sts & DMA_GSTS_QIES)
1121                 iommu->gcmd |= DMA_GCMD_QIE;
1122
1123         raw_spin_lock_init(&iommu->register_lock);
1124
1125         /*
1126          * This is only for hotplug; at boot time intel_iommu_enabled won't
1127          * be set yet. When intel_iommu_init() runs, it registers the units
1128          * present at boot time, then sets intel_iommu_enabled.
1129          */
1130         if (intel_iommu_enabled && !drhd->ignored) {
1131                 err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1132                                              intel_iommu_groups,
1133                                              "%s", iommu->name);
1134                 if (err)
1135                         goto err_unmap;
1136
1137                 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
1138
1139                 err = iommu_device_register(&iommu->iommu);
1140                 if (err)
1141                         goto err_sysfs;
1142         }
1143
1144         drhd->iommu = iommu;
1145         iommu->drhd = drhd;
1146
1147         return 0;
1148
1149 err_sysfs:
1150         iommu_device_sysfs_remove(&iommu->iommu);
1151 err_unmap:
1152         unmap_iommu(iommu);
1153 error_free_seq_id:
1154         dmar_free_seq_id(iommu);
1155 error:
1156         kfree(iommu);
1157         return err;
1158 }
1159
1160 static void free_iommu(struct intel_iommu *iommu)
1161 {
1162         if (intel_iommu_enabled && !iommu->drhd->ignored) {
1163                 iommu_device_unregister(&iommu->iommu);
1164                 iommu_device_sysfs_remove(&iommu->iommu);
1165         }
1166
1167         if (iommu->irq) {
1168                 if (iommu->pr_irq) {
1169                         free_irq(iommu->pr_irq, iommu);
1170                         dmar_free_hwirq(iommu->pr_irq);
1171                         iommu->pr_irq = 0;
1172                 }
1173                 free_irq(iommu->irq, iommu);
1174                 dmar_free_hwirq(iommu->irq);
1175                 iommu->irq = 0;
1176         }
1177
1178         if (iommu->qi) {
1179                 free_page((unsigned long)iommu->qi->desc);
1180                 kfree(iommu->qi->desc_status);
1181                 kfree(iommu->qi);
1182         }
1183
1184         if (iommu->reg)
1185                 unmap_iommu(iommu);
1186
1187         dmar_free_seq_id(iommu);
1188         kfree(iommu);
1189 }
1190
1191 /*
1192  * Reclaim all the submitted descriptors which have completed its work.
1193  */
1194 static inline void reclaim_free_desc(struct q_inval *qi)
1195 {
1196         while (qi->desc_status[qi->free_tail] == QI_DONE ||
1197                qi->desc_status[qi->free_tail] == QI_ABORT) {
1198                 qi->desc_status[qi->free_tail] = QI_FREE;
1199                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1200                 qi->free_cnt++;
1201         }
1202 }
1203
1204 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1205 {
1206         u32 fault;
1207         int head, tail;
1208         struct q_inval *qi = iommu->qi;
1209         int shift = qi_shift(iommu);
1210
1211         if (qi->desc_status[wait_index] == QI_ABORT)
1212                 return -EAGAIN;
1213
1214         fault = readl(iommu->reg + DMAR_FSTS_REG);
1215
1216         /*
1217          * If IQE happens, the head points to the descriptor associated
1218          * with the error. No new descriptors are fetched until the IQE
1219          * is cleared.
1220          */
1221         if (fault & DMA_FSTS_IQE) {
1222                 head = readl(iommu->reg + DMAR_IQH_REG);
1223                 if ((head >> shift) == index) {
1224                         struct qi_desc *desc = qi->desc + head;
1225
1226                         /*
1227                          * desc->qw2 and desc->qw3 are either reserved or
1228                          * used by software as private data. We won't print
1229                          * out these two qw's for security consideration.
1230                          */
1231                         pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
1232                                (unsigned long long)desc->qw0,
1233                                (unsigned long long)desc->qw1);
1234                         memcpy(desc, qi->desc + (wait_index << shift),
1235                                1 << shift);
1236                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1237                         return -EINVAL;
1238                 }
1239         }
1240
1241         /*
1242          * If ITE happens, all pending wait_desc commands are aborted.
1243          * No new descriptors are fetched until the ITE is cleared.
1244          */
1245         if (fault & DMA_FSTS_ITE) {
1246                 head = readl(iommu->reg + DMAR_IQH_REG);
1247                 head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1248                 head |= 1;
1249                 tail = readl(iommu->reg + DMAR_IQT_REG);
1250                 tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1251
1252                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1253
1254                 do {
1255                         if (qi->desc_status[head] == QI_IN_USE)
1256                                 qi->desc_status[head] = QI_ABORT;
1257                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1258                 } while (head != tail);
1259
1260                 if (qi->desc_status[wait_index] == QI_ABORT)
1261                         return -EAGAIN;
1262         }
1263
1264         if (fault & DMA_FSTS_ICE)
1265                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1266
1267         return 0;
1268 }
1269
1270 /*
1271  * Function to submit invalidation descriptors of all types to the queued
1272  * invalidation interface(QI). Multiple descriptors can be submitted at a
1273  * time, a wait descriptor will be appended to each submission to ensure
1274  * hardware has completed the invalidation before return. Wait descriptors
1275  * can be part of the submission but it will not be polled for completion.
1276  */
1277 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1278                    unsigned int count, unsigned long options)
1279 {
1280         struct q_inval *qi = iommu->qi;
1281         struct qi_desc wait_desc;
1282         int wait_index, index;
1283         unsigned long flags;
1284         int offset, shift;
1285         int rc, i;
1286
1287         if (!qi)
1288                 return 0;
1289
1290 restart:
1291         rc = 0;
1292
1293         raw_spin_lock_irqsave(&qi->q_lock, flags);
1294         /*
1295          * Check if we have enough empty slots in the queue to submit,
1296          * the calculation is based on:
1297          * # of desc + 1 wait desc + 1 space between head and tail
1298          */
1299         while (qi->free_cnt < count + 2) {
1300                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1301                 cpu_relax();
1302                 raw_spin_lock_irqsave(&qi->q_lock, flags);
1303         }
1304
1305         index = qi->free_head;
1306         wait_index = (index + count) % QI_LENGTH;
1307         shift = qi_shift(iommu);
1308
1309         for (i = 0; i < count; i++) {
1310                 offset = ((index + i) % QI_LENGTH) << shift;
1311                 memcpy(qi->desc + offset, &desc[i], 1 << shift);
1312                 qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1313         }
1314         qi->desc_status[wait_index] = QI_IN_USE;
1315
1316         wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1317                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1318         if (options & QI_OPT_WAIT_DRAIN)
1319                 wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1320         wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1321         wait_desc.qw2 = 0;
1322         wait_desc.qw3 = 0;
1323
1324         offset = wait_index << shift;
1325         memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1326
1327         qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1328         qi->free_cnt -= count + 1;
1329
1330         /*
1331          * update the HW tail register indicating the presence of
1332          * new descriptors.
1333          */
1334         writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1335
1336         while (qi->desc_status[wait_index] != QI_DONE) {
1337                 /*
1338                  * We will leave the interrupts disabled, to prevent interrupt
1339                  * context to queue another cmd while a cmd is already submitted
1340                  * and waiting for completion on this cpu. This is to avoid
1341                  * a deadlock where the interrupt context can wait indefinitely
1342                  * for free slots in the queue.
1343                  */
1344                 rc = qi_check_fault(iommu, index, wait_index);
1345                 if (rc)
1346                         break;
1347
1348                 raw_spin_unlock(&qi->q_lock);
1349                 cpu_relax();
1350                 raw_spin_lock(&qi->q_lock);
1351         }
1352
1353         for (i = 0; i < count; i++)
1354                 qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1355
1356         reclaim_free_desc(qi);
1357         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1358
1359         if (rc == -EAGAIN)
1360                 goto restart;
1361
1362         return rc;
1363 }
1364
1365 /*
1366  * Flush the global interrupt entry cache.
1367  */
1368 void qi_global_iec(struct intel_iommu *iommu)
1369 {
1370         struct qi_desc desc;
1371
1372         desc.qw0 = QI_IEC_TYPE;
1373         desc.qw1 = 0;
1374         desc.qw2 = 0;
1375         desc.qw3 = 0;
1376
1377         /* should never fail */
1378         qi_submit_sync(iommu, &desc, 1, 0);
1379 }
1380
1381 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1382                       u64 type)
1383 {
1384         struct qi_desc desc;
1385
1386         desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1387                         | QI_CC_GRAN(type) | QI_CC_TYPE;
1388         desc.qw1 = 0;
1389         desc.qw2 = 0;
1390         desc.qw3 = 0;
1391
1392         qi_submit_sync(iommu, &desc, 1, 0);
1393 }
1394
1395 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1396                     unsigned int size_order, u64 type)
1397 {
1398         u8 dw = 0, dr = 0;
1399
1400         struct qi_desc desc;
1401         int ih = 0;
1402
1403         if (cap_write_drain(iommu->cap))
1404                 dw = 1;
1405
1406         if (cap_read_drain(iommu->cap))
1407                 dr = 1;
1408
1409         desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1410                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1411         desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1412                 | QI_IOTLB_AM(size_order);
1413         desc.qw2 = 0;
1414         desc.qw3 = 0;
1415
1416         qi_submit_sync(iommu, &desc, 1, 0);
1417 }
1418
1419 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1420                         u16 qdep, u64 addr, unsigned mask)
1421 {
1422         struct qi_desc desc;
1423
1424         if (mask) {
1425                 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1426                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1427         } else
1428                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1429
1430         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1431                 qdep = 0;
1432
1433         desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1434                    QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1435         desc.qw2 = 0;
1436         desc.qw3 = 0;
1437
1438         qi_submit_sync(iommu, &desc, 1, 0);
1439 }
1440
1441 /* PASID-based IOTLB invalidation */
1442 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1443                      unsigned long npages, bool ih)
1444 {
1445         struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1446
1447         /*
1448          * npages == -1 means a PASID-selective invalidation, otherwise,
1449          * a positive value for Page-selective-within-PASID invalidation.
1450          * 0 is not a valid input.
1451          */
1452         if (WARN_ON(!npages)) {
1453                 pr_err("Invalid input npages = %ld\n", npages);
1454                 return;
1455         }
1456
1457         if (npages == -1) {
1458                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1459                                 QI_EIOTLB_DID(did) |
1460                                 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1461                                 QI_EIOTLB_TYPE;
1462                 desc.qw1 = 0;
1463         } else {
1464                 int mask = ilog2(__roundup_pow_of_two(npages));
1465                 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1466
1467                 if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
1468                         addr = ALIGN_DOWN(addr, align);
1469
1470                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1471                                 QI_EIOTLB_DID(did) |
1472                                 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1473                                 QI_EIOTLB_TYPE;
1474                 desc.qw1 = QI_EIOTLB_ADDR(addr) |
1475                                 QI_EIOTLB_IH(ih) |
1476                                 QI_EIOTLB_AM(mask);
1477         }
1478
1479         qi_submit_sync(iommu, &desc, 1, 0);
1480 }
1481
1482 /* PASID-based device IOTLB Invalidate */
1483 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1484                               u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1485 {
1486         unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1487         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1488
1489         desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1490                 QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1491                 QI_DEV_IOTLB_PFSID(pfsid);
1492
1493         /*
1494          * If S bit is 0, we only flush a single page. If S bit is set,
1495          * The least significant zero bit indicates the invalidation address
1496          * range. VT-d spec 6.5.2.6.
1497          * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1498          * size order = 0 is PAGE_SIZE 4KB
1499          * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1500          * ECAP.
1501          */
1502         if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
1503                 pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1504                                     addr, size_order);
1505
1506         /* Take page address */
1507         desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1508
1509         if (size_order) {
1510                 /*
1511                  * Existing 0s in address below size_order may be the least
1512                  * significant bit, we must set them to 1s to avoid having
1513                  * smaller size than desired.
1514                  */
1515                 desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1516                                         VTD_PAGE_SHIFT);
1517                 /* Clear size_order bit to indicate size */
1518                 desc.qw1 &= ~mask;
1519                 /* Set the S bit to indicate flushing more than 1 page */
1520                 desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1521         }
1522
1523         qi_submit_sync(iommu, &desc, 1, 0);
1524 }
1525
1526 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1527                           u64 granu, u32 pasid)
1528 {
1529         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1530
1531         desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1532                         QI_PC_GRAN(granu) | QI_PC_TYPE;
1533         qi_submit_sync(iommu, &desc, 1, 0);
1534 }
1535
1536 /*
1537  * Disable Queued Invalidation interface.
1538  */
1539 void dmar_disable_qi(struct intel_iommu *iommu)
1540 {
1541         unsigned long flags;
1542         u32 sts;
1543         cycles_t start_time = get_cycles();
1544
1545         if (!ecap_qis(iommu->ecap))
1546                 return;
1547
1548         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1549
1550         sts =  readl(iommu->reg + DMAR_GSTS_REG);
1551         if (!(sts & DMA_GSTS_QIES))
1552                 goto end;
1553
1554         /*
1555          * Give a chance to HW to complete the pending invalidation requests.
1556          */
1557         while ((readl(iommu->reg + DMAR_IQT_REG) !=
1558                 readl(iommu->reg + DMAR_IQH_REG)) &&
1559                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1560                 cpu_relax();
1561
1562         iommu->gcmd &= ~DMA_GCMD_QIE;
1563         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1564
1565         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1566                       !(sts & DMA_GSTS_QIES), sts);
1567 end:
1568         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1569 }
1570
1571 /*
1572  * Enable queued invalidation.
1573  */
1574 static void __dmar_enable_qi(struct intel_iommu *iommu)
1575 {
1576         u32 sts;
1577         unsigned long flags;
1578         struct q_inval *qi = iommu->qi;
1579         u64 val = virt_to_phys(qi->desc);
1580
1581         qi->free_head = qi->free_tail = 0;
1582         qi->free_cnt = QI_LENGTH;
1583
1584         /*
1585          * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1586          * is present.
1587          */
1588         if (ecap_smts(iommu->ecap))
1589                 val |= (1 << 11) | 1;
1590
1591         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1592
1593         /* write zero to the tail reg */
1594         writel(0, iommu->reg + DMAR_IQT_REG);
1595
1596         dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1597
1598         iommu->gcmd |= DMA_GCMD_QIE;
1599         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1600
1601         /* Make sure hardware complete it */
1602         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1603
1604         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1605 }
1606
1607 /*
1608  * Enable Queued Invalidation interface. This is a must to support
1609  * interrupt-remapping. Also used by DMA-remapping, which replaces
1610  * register based IOTLB invalidation.
1611  */
1612 int dmar_enable_qi(struct intel_iommu *iommu)
1613 {
1614         struct q_inval *qi;
1615         struct page *desc_page;
1616
1617         if (!ecap_qis(iommu->ecap))
1618                 return -ENOENT;
1619
1620         /*
1621          * queued invalidation is already setup and enabled.
1622          */
1623         if (iommu->qi)
1624                 return 0;
1625
1626         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1627         if (!iommu->qi)
1628                 return -ENOMEM;
1629
1630         qi = iommu->qi;
1631
1632         /*
1633          * Need two pages to accommodate 256 descriptors of 256 bits each
1634          * if the remapping hardware supports scalable mode translation.
1635          */
1636         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1637                                      !!ecap_smts(iommu->ecap));
1638         if (!desc_page) {
1639                 kfree(qi);
1640                 iommu->qi = NULL;
1641                 return -ENOMEM;
1642         }
1643
1644         qi->desc = page_address(desc_page);
1645
1646         qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1647         if (!qi->desc_status) {
1648                 free_page((unsigned long) qi->desc);
1649                 kfree(qi);
1650                 iommu->qi = NULL;
1651                 return -ENOMEM;
1652         }
1653
1654         raw_spin_lock_init(&qi->q_lock);
1655
1656         __dmar_enable_qi(iommu);
1657
1658         return 0;
1659 }
1660
1661 /* iommu interrupt handling. Most stuff are MSI-like. */
1662
1663 enum faulttype {
1664         DMA_REMAP,
1665         INTR_REMAP,
1666         UNKNOWN,
1667 };
1668
1669 static const char *dma_remap_fault_reasons[] =
1670 {
1671         "Software",
1672         "Present bit in root entry is clear",
1673         "Present bit in context entry is clear",
1674         "Invalid context entry",
1675         "Access beyond MGAW",
1676         "PTE Write access is not set",
1677         "PTE Read access is not set",
1678         "Next page table ptr is invalid",
1679         "Root table address invalid",
1680         "Context table ptr is invalid",
1681         "non-zero reserved fields in RTP",
1682         "non-zero reserved fields in CTP",
1683         "non-zero reserved fields in PTE",
1684         "PCE for translation request specifies blocking",
1685 };
1686
1687 static const char * const dma_remap_sm_fault_reasons[] = {
1688         "SM: Invalid Root Table Address",
1689         "SM: TTM 0 for request with PASID",
1690         "SM: TTM 0 for page group request",
1691         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1692         "SM: Error attempting to access Root Entry",
1693         "SM: Present bit in Root Entry is clear",
1694         "SM: Non-zero reserved field set in Root Entry",
1695         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1696         "SM: Error attempting to access Context Entry",
1697         "SM: Present bit in Context Entry is clear",
1698         "SM: Non-zero reserved field set in the Context Entry",
1699         "SM: Invalid Context Entry",
1700         "SM: DTE field in Context Entry is clear",
1701         "SM: PASID Enable field in Context Entry is clear",
1702         "SM: PASID is larger than the max in Context Entry",
1703         "SM: PRE field in Context-Entry is clear",
1704         "SM: RID_PASID field error in Context-Entry",
1705         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1706         "SM: Error attempting to access the PASID Directory Entry",
1707         "SM: Present bit in Directory Entry is clear",
1708         "SM: Non-zero reserved field set in PASID Directory Entry",
1709         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1710         "SM: Error attempting to access PASID Table Entry",
1711         "SM: Present bit in PASID Table Entry is clear",
1712         "SM: Non-zero reserved field set in PASID Table Entry",
1713         "SM: Invalid Scalable-Mode PASID Table Entry",
1714         "SM: ERE field is clear in PASID Table Entry",
1715         "SM: SRE field is clear in PASID Table Entry",
1716         "Unknown", "Unknown",/* 0x5E-0x5F */
1717         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1718         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1719         "SM: Error attempting to access first-level paging entry",
1720         "SM: Present bit in first-level paging entry is clear",
1721         "SM: Non-zero reserved field set in first-level paging entry",
1722         "SM: Error attempting to access FL-PML4 entry",
1723         "SM: First-level entry address beyond MGAW in Nested translation",
1724         "SM: Read permission error in FL-PML4 entry in Nested translation",
1725         "SM: Read permission error in first-level paging entry in Nested translation",
1726         "SM: Write permission error in first-level paging entry in Nested translation",
1727         "SM: Error attempting to access second-level paging entry",
1728         "SM: Read/Write permission error in second-level paging entry",
1729         "SM: Non-zero reserved field set in second-level paging entry",
1730         "SM: Invalid second-level page table pointer",
1731         "SM: A/D bit update needed in second-level entry when set up in no snoop",
1732         "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1733         "SM: Address in first-level translation is not canonical",
1734         "SM: U/S set 0 for first-level translation with user privilege",
1735         "SM: No execute permission for request with PASID and ER=1",
1736         "SM: Address beyond the DMA hardware max",
1737         "SM: Second-level entry address beyond the max",
1738         "SM: No write permission for Write/AtomicOp request",
1739         "SM: No read permission for Read/AtomicOp request",
1740         "SM: Invalid address-interrupt address",
1741         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1742         "SM: A/D bit update needed in first-level entry when set up in no snoop",
1743 };
1744
1745 static const char *irq_remap_fault_reasons[] =
1746 {
1747         "Detected reserved fields in the decoded interrupt-remapped request",
1748         "Interrupt index exceeded the interrupt-remapping table size",
1749         "Present field in the IRTE entry is clear",
1750         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1751         "Detected reserved fields in the IRTE entry",
1752         "Blocked a compatibility format interrupt request",
1753         "Blocked an interrupt request due to source-id verification failure",
1754 };
1755
1756 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1757 {
1758         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1759                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1760                 *fault_type = INTR_REMAP;
1761                 return irq_remap_fault_reasons[fault_reason - 0x20];
1762         } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1763                         ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1764                 *fault_type = DMA_REMAP;
1765                 return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1766         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1767                 *fault_type = DMA_REMAP;
1768                 return dma_remap_fault_reasons[fault_reason];
1769         } else {
1770                 *fault_type = UNKNOWN;
1771                 return "Unknown";
1772         }
1773 }
1774
1775
1776 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1777 {
1778         if (iommu->irq == irq)
1779                 return DMAR_FECTL_REG;
1780         else if (iommu->pr_irq == irq)
1781                 return DMAR_PECTL_REG;
1782         else
1783                 BUG();
1784 }
1785
1786 void dmar_msi_unmask(struct irq_data *data)
1787 {
1788         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1789         int reg = dmar_msi_reg(iommu, data->irq);
1790         unsigned long flag;
1791
1792         /* unmask it */
1793         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1794         writel(0, iommu->reg + reg);
1795         /* Read a reg to force flush the post write */
1796         readl(iommu->reg + reg);
1797         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1798 }
1799
1800 void dmar_msi_mask(struct irq_data *data)
1801 {
1802         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1803         int reg = dmar_msi_reg(iommu, data->irq);
1804         unsigned long flag;
1805
1806         /* mask it */
1807         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1808         writel(DMA_FECTL_IM, iommu->reg + reg);
1809         /* Read a reg to force flush the post write */
1810         readl(iommu->reg + reg);
1811         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1812 }
1813
1814 void dmar_msi_write(int irq, struct msi_msg *msg)
1815 {
1816         struct intel_iommu *iommu = irq_get_handler_data(irq);
1817         int reg = dmar_msi_reg(iommu, irq);
1818         unsigned long flag;
1819
1820         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1821         writel(msg->data, iommu->reg + reg + 4);
1822         writel(msg->address_lo, iommu->reg + reg + 8);
1823         writel(msg->address_hi, iommu->reg + reg + 12);
1824         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1825 }
1826
1827 void dmar_msi_read(int irq, struct msi_msg *msg)
1828 {
1829         struct intel_iommu *iommu = irq_get_handler_data(irq);
1830         int reg = dmar_msi_reg(iommu, irq);
1831         unsigned long flag;
1832
1833         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1834         msg->data = readl(iommu->reg + reg + 4);
1835         msg->address_lo = readl(iommu->reg + reg + 8);
1836         msg->address_hi = readl(iommu->reg + reg + 12);
1837         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1838 }
1839
1840 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1841                 u8 fault_reason, u32 pasid, u16 source_id,
1842                 unsigned long long addr)
1843 {
1844         const char *reason;
1845         int fault_type;
1846
1847         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1848
1849         if (fault_type == INTR_REMAP)
1850                 pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
1851                         source_id >> 8, PCI_SLOT(source_id & 0xFF),
1852                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1853                         fault_reason, reason);
1854         else
1855                 pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
1856                        type ? "DMA Read" : "DMA Write",
1857                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1858                        PCI_FUNC(source_id & 0xFF), pasid, addr,
1859                        fault_reason, reason);
1860         return 0;
1861 }
1862
1863 #define PRIMARY_FAULT_REG_LEN (16)
1864 irqreturn_t dmar_fault(int irq, void *dev_id)
1865 {
1866         struct intel_iommu *iommu = dev_id;
1867         int reg, fault_index;
1868         u32 fault_status;
1869         unsigned long flag;
1870         static DEFINE_RATELIMIT_STATE(rs,
1871                                       DEFAULT_RATELIMIT_INTERVAL,
1872                                       DEFAULT_RATELIMIT_BURST);
1873
1874         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1875         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1876         if (fault_status && __ratelimit(&rs))
1877                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1878
1879         /* TBD: ignore advanced fault log currently */
1880         if (!(fault_status & DMA_FSTS_PPF))
1881                 goto unlock_exit;
1882
1883         fault_index = dma_fsts_fault_record_index(fault_status);
1884         reg = cap_fault_reg_offset(iommu->cap);
1885         while (1) {
1886                 /* Disable printing, simply clear the fault when ratelimited */
1887                 bool ratelimited = !__ratelimit(&rs);
1888                 u8 fault_reason;
1889                 u16 source_id;
1890                 u64 guest_addr;
1891                 u32 pasid;
1892                 int type;
1893                 u32 data;
1894                 bool pasid_present;
1895
1896                 /* highest 32 bits */
1897                 data = readl(iommu->reg + reg +
1898                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1899                 if (!(data & DMA_FRCD_F))
1900                         break;
1901
1902                 if (!ratelimited) {
1903                         fault_reason = dma_frcd_fault_reason(data);
1904                         type = dma_frcd_type(data);
1905
1906                         pasid = dma_frcd_pasid_value(data);
1907                         data = readl(iommu->reg + reg +
1908                                      fault_index * PRIMARY_FAULT_REG_LEN + 8);
1909                         source_id = dma_frcd_source_id(data);
1910
1911                         pasid_present = dma_frcd_pasid_present(data);
1912                         guest_addr = dmar_readq(iommu->reg + reg +
1913                                         fault_index * PRIMARY_FAULT_REG_LEN);
1914                         guest_addr = dma_frcd_page_addr(guest_addr);
1915                 }
1916
1917                 /* clear the fault */
1918                 writel(DMA_FRCD_F, iommu->reg + reg +
1919                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1920
1921                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1922
1923                 if (!ratelimited)
1924                         /* Using pasid -1 if pasid is not present */
1925                         dmar_fault_do_one(iommu, type, fault_reason,
1926                                           pasid_present ? pasid : -1,
1927                                           source_id, guest_addr);
1928
1929                 fault_index++;
1930                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1931                         fault_index = 0;
1932                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1933         }
1934
1935         writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
1936                iommu->reg + DMAR_FSTS_REG);
1937
1938 unlock_exit:
1939         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1940         return IRQ_HANDLED;
1941 }
1942
1943 int dmar_set_interrupt(struct intel_iommu *iommu)
1944 {
1945         int irq, ret;
1946
1947         /*
1948          * Check if the fault interrupt is already initialized.
1949          */
1950         if (iommu->irq)
1951                 return 0;
1952
1953         irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
1954         if (irq > 0) {
1955                 iommu->irq = irq;
1956         } else {
1957                 pr_err("No free IRQ vectors\n");
1958                 return -EINVAL;
1959         }
1960
1961         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1962         if (ret)
1963                 pr_err("Can't request irq\n");
1964         return ret;
1965 }
1966
1967 int __init enable_drhd_fault_handling(void)
1968 {
1969         struct dmar_drhd_unit *drhd;
1970         struct intel_iommu *iommu;
1971
1972         /*
1973          * Enable fault control interrupt.
1974          */
1975         for_each_iommu(iommu, drhd) {
1976                 u32 fault_status;
1977                 int ret = dmar_set_interrupt(iommu);
1978
1979                 if (ret) {
1980                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1981                                (unsigned long long)drhd->reg_base_addr, ret);
1982                         return -1;
1983                 }
1984
1985                 /*
1986                  * Clear any previous faults.
1987                  */
1988                 dmar_fault(iommu->irq, iommu);
1989                 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1990                 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1991         }
1992
1993         return 0;
1994 }
1995
1996 /*
1997  * Re-enable Queued Invalidation interface.
1998  */
1999 int dmar_reenable_qi(struct intel_iommu *iommu)
2000 {
2001         if (!ecap_qis(iommu->ecap))
2002                 return -ENOENT;
2003
2004         if (!iommu->qi)
2005                 return -ENOENT;
2006
2007         /*
2008          * First disable queued invalidation.
2009          */
2010         dmar_disable_qi(iommu);
2011         /*
2012          * Then enable queued invalidation again. Since there is no pending
2013          * invalidation requests now, it's safe to re-enable queued
2014          * invalidation.
2015          */
2016         __dmar_enable_qi(iommu);
2017
2018         return 0;
2019 }
2020
2021 /*
2022  * Check interrupt remapping support in DMAR table description.
2023  */
2024 int __init dmar_ir_support(void)
2025 {
2026         struct acpi_table_dmar *dmar;
2027         dmar = (struct acpi_table_dmar *)dmar_tbl;
2028         if (!dmar)
2029                 return 0;
2030         return dmar->flags & 0x1;
2031 }
2032
2033 /* Check whether DMAR units are in use */
2034 static inline bool dmar_in_use(void)
2035 {
2036         return irq_remapping_enabled || intel_iommu_enabled;
2037 }
2038
2039 static int __init dmar_free_unused_resources(void)
2040 {
2041         struct dmar_drhd_unit *dmaru, *dmaru_n;
2042
2043         if (dmar_in_use())
2044                 return 0;
2045
2046         if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2047                 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2048
2049         down_write(&dmar_global_lock);
2050         list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2051                 list_del(&dmaru->list);
2052                 dmar_free_drhd(dmaru);
2053         }
2054         up_write(&dmar_global_lock);
2055
2056         return 0;
2057 }
2058
2059 late_initcall(dmar_free_unused_resources);
2060 IOMMU_INIT_POST(detect_intel_iommu);
2061
2062 /*
2063  * DMAR Hotplug Support
2064  * For more details, please refer to Intel(R) Virtualization Technology
2065  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2066  * "Remapping Hardware Unit Hot Plug".
2067  */
2068 static guid_t dmar_hp_guid =
2069         GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2070                   0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2071
2072 /*
2073  * Currently there's only one revision and BIOS will not check the revision id,
2074  * so use 0 for safety.
2075  */
2076 #define DMAR_DSM_REV_ID                 0
2077 #define DMAR_DSM_FUNC_DRHD              1
2078 #define DMAR_DSM_FUNC_ATSR              2
2079 #define DMAR_DSM_FUNC_RHSA              3
2080
2081 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2082 {
2083         return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2084 }
2085
2086 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2087                                   dmar_res_handler_t handler, void *arg)
2088 {
2089         int ret = -ENODEV;
2090         union acpi_object *obj;
2091         struct acpi_dmar_header *start;
2092         struct dmar_res_callback callback;
2093         static int res_type[] = {
2094                 [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2095                 [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2096                 [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2097         };
2098
2099         if (!dmar_detect_dsm(handle, func))
2100                 return 0;
2101
2102         obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2103                                       func, NULL, ACPI_TYPE_BUFFER);
2104         if (!obj)
2105                 return -ENODEV;
2106
2107         memset(&callback, 0, sizeof(callback));
2108         callback.cb[res_type[func]] = handler;
2109         callback.arg[res_type[func]] = arg;
2110         start = (struct acpi_dmar_header *)obj->buffer.pointer;
2111         ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2112
2113         ACPI_FREE(obj);
2114
2115         return ret;
2116 }
2117
2118 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2119 {
2120         int ret;
2121         struct dmar_drhd_unit *dmaru;
2122
2123         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2124         if (!dmaru)
2125                 return -ENODEV;
2126
2127         ret = dmar_ir_hotplug(dmaru, true);
2128         if (ret == 0)
2129                 ret = dmar_iommu_hotplug(dmaru, true);
2130
2131         return ret;
2132 }
2133
2134 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2135 {
2136         int i, ret;
2137         struct device *dev;
2138         struct dmar_drhd_unit *dmaru;
2139
2140         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2141         if (!dmaru)
2142                 return 0;
2143
2144         /*
2145          * All PCI devices managed by this unit should have been destroyed.
2146          */
2147         if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2148                 for_each_active_dev_scope(dmaru->devices,
2149                                           dmaru->devices_cnt, i, dev)
2150                         return -EBUSY;
2151         }
2152
2153         ret = dmar_ir_hotplug(dmaru, false);
2154         if (ret == 0)
2155                 ret = dmar_iommu_hotplug(dmaru, false);
2156
2157         return ret;
2158 }
2159
2160 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2161 {
2162         struct dmar_drhd_unit *dmaru;
2163
2164         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2165         if (dmaru) {
2166                 list_del_rcu(&dmaru->list);
2167                 synchronize_rcu();
2168                 dmar_free_drhd(dmaru);
2169         }
2170
2171         return 0;
2172 }
2173
2174 static int dmar_hotplug_insert(acpi_handle handle)
2175 {
2176         int ret;
2177         int drhd_count = 0;
2178
2179         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2180                                      &dmar_validate_one_drhd, (void *)1);
2181         if (ret)
2182                 goto out;
2183
2184         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2185                                      &dmar_parse_one_drhd, (void *)&drhd_count);
2186         if (ret == 0 && drhd_count == 0) {
2187                 pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2188                 goto out;
2189         } else if (ret) {
2190                 goto release_drhd;
2191         }
2192
2193         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2194                                      &dmar_parse_one_rhsa, NULL);
2195         if (ret)
2196                 goto release_drhd;
2197
2198         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2199                                      &dmar_parse_one_atsr, NULL);
2200         if (ret)
2201                 goto release_atsr;
2202
2203         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2204                                      &dmar_hp_add_drhd, NULL);
2205         if (!ret)
2206                 return 0;
2207
2208         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2209                                &dmar_hp_remove_drhd, NULL);
2210 release_atsr:
2211         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2212                                &dmar_release_one_atsr, NULL);
2213 release_drhd:
2214         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2215                                &dmar_hp_release_drhd, NULL);
2216 out:
2217         return ret;
2218 }
2219
2220 static int dmar_hotplug_remove(acpi_handle handle)
2221 {
2222         int ret;
2223
2224         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2225                                      &dmar_check_one_atsr, NULL);
2226         if (ret)
2227                 return ret;
2228
2229         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2230                                      &dmar_hp_remove_drhd, NULL);
2231         if (ret == 0) {
2232                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2233                                                &dmar_release_one_atsr, NULL));
2234                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2235                                                &dmar_hp_release_drhd, NULL));
2236         } else {
2237                 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2238                                        &dmar_hp_add_drhd, NULL);
2239         }
2240
2241         return ret;
2242 }
2243
2244 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2245                                        void *context, void **retval)
2246 {
2247         acpi_handle *phdl = retval;
2248
2249         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2250                 *phdl = handle;
2251                 return AE_CTRL_TERMINATE;
2252         }
2253
2254         return AE_OK;
2255 }
2256
2257 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2258 {
2259         int ret;
2260         acpi_handle tmp = NULL;
2261         acpi_status status;
2262
2263         if (!dmar_in_use())
2264                 return 0;
2265
2266         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2267                 tmp = handle;
2268         } else {
2269                 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2270                                              ACPI_UINT32_MAX,
2271                                              dmar_get_dsm_handle,
2272                                              NULL, NULL, &tmp);
2273                 if (ACPI_FAILURE(status)) {
2274                         pr_warn("Failed to locate _DSM method.\n");
2275                         return -ENXIO;
2276                 }
2277         }
2278         if (tmp == NULL)
2279                 return 0;
2280
2281         down_write(&dmar_global_lock);
2282         if (insert)
2283                 ret = dmar_hotplug_insert(tmp);
2284         else
2285                 ret = dmar_hotplug_remove(tmp);
2286         up_write(&dmar_global_lock);
2287
2288         return ret;
2289 }
2290
2291 int dmar_device_add(acpi_handle handle)
2292 {
2293         return dmar_device_hotplug(handle, true);
2294 }
2295
2296 int dmar_device_remove(acpi_handle handle)
2297 {
2298         return dmar_device_hotplug(handle, false);
2299 }
2300
2301 /*
2302  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2303  *
2304  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2305  * the ACPI DMAR table. This means that the platform boot firmware has made
2306  * sure no device can issue DMA outside of RMRR regions.
2307  */
2308 bool dmar_platform_optin(void)
2309 {
2310         struct acpi_table_dmar *dmar;
2311         acpi_status status;
2312         bool ret;
2313
2314         status = acpi_get_table(ACPI_SIG_DMAR, 0,
2315                                 (struct acpi_table_header **)&dmar);
2316         if (ACPI_FAILURE(status))
2317                 return false;
2318
2319         ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2320         acpi_put_table((struct acpi_table_header *)dmar);
2321
2322         return ret;
2323 }
2324 EXPORT_SYMBOL_GPL(dmar_platform_optin);