GNU Linux-libre 5.4.241-gnu1
[releases.git] / drivers / iommu / dmar.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16
17 #define pr_fmt(fmt)     "DMAR: " fmt
18
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/iommu_table.h>
34
35 #include "irq_remapping.h"
36
37 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
38 struct dmar_res_callback {
39         dmar_res_handler_t      cb[ACPI_DMAR_TYPE_RESERVED];
40         void                    *arg[ACPI_DMAR_TYPE_RESERVED];
41         bool                    ignore_unhandled;
42         bool                    print_entry;
43 };
44
45 /*
46  * Assumptions:
47  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
48  *    before IO devices managed by that unit.
49  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
50  *    after IO devices managed by that unit.
51  * 3) Hotplug events are rare.
52  *
53  * Locking rules for DMA and interrupt remapping related global data structures:
54  * 1) Use dmar_global_lock in process context
55  * 2) Use RCU in interrupt context
56  */
57 DECLARE_RWSEM(dmar_global_lock);
58 LIST_HEAD(dmar_drhd_units);
59
60 struct acpi_table_header * __initdata dmar_tbl;
61 static int dmar_dev_scope_status = 1;
62 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
63
64 static int alloc_iommu(struct dmar_drhd_unit *drhd);
65 static void free_iommu(struct intel_iommu *iommu);
66
67 extern const struct iommu_ops intel_iommu_ops;
68
69 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71         /*
72          * add INCLUDE_ALL at the tail, so scan the list will find it at
73          * the very end.
74          */
75         if (drhd->include_all)
76                 list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77         else
78                 list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83         struct acpi_dmar_device_scope *scope;
84
85         *cnt = 0;
86         while (start < end) {
87                 scope = start;
88                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91                         (*cnt)++;
92                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93                         scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94                         pr_warn("Unsupported device scope\n");
95                 }
96                 start += scope->length;
97         }
98         if (*cnt == 0)
99                 return NULL;
100
101         return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106         int i;
107         struct device *tmp_dev;
108
109         if (*devices && *cnt) {
110                 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111                         put_device(tmp_dev);
112                 kfree(*devices);
113         }
114
115         *devices = NULL;
116         *cnt = 0;
117 }
118
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125         int level = 0;
126         size_t size;
127         struct pci_dev *tmp;
128         struct dmar_pci_notify_info *info;
129
130         BUG_ON(dev->is_virtfn);
131
132         /*
133          * Ignore devices that have a domain number higher than what can
134          * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
135          */
136         if (pci_domain_nr(dev->bus) > U16_MAX)
137                 return NULL;
138
139         /* Only generate path[] for device addition event */
140         if (event == BUS_NOTIFY_ADD_DEVICE)
141                 for (tmp = dev; tmp; tmp = tmp->bus->self)
142                         level++;
143
144         size = struct_size(info, path, level);
145         if (size <= sizeof(dmar_pci_notify_info_buf)) {
146                 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
147         } else {
148                 info = kzalloc(size, GFP_KERNEL);
149                 if (!info) {
150                         pr_warn("Out of memory when allocating notify_info "
151                                 "for %s.\n", pci_name(dev));
152                         if (dmar_dev_scope_status == 0)
153                                 dmar_dev_scope_status = -ENOMEM;
154                         return NULL;
155                 }
156         }
157
158         info->event = event;
159         info->dev = dev;
160         info->seg = pci_domain_nr(dev->bus);
161         info->level = level;
162         if (event == BUS_NOTIFY_ADD_DEVICE) {
163                 for (tmp = dev; tmp; tmp = tmp->bus->self) {
164                         level--;
165                         info->path[level].bus = tmp->bus->number;
166                         info->path[level].device = PCI_SLOT(tmp->devfn);
167                         info->path[level].function = PCI_FUNC(tmp->devfn);
168                         if (pci_is_root_bus(tmp->bus))
169                                 info->bus = tmp->bus->number;
170                 }
171         }
172
173         return info;
174 }
175
176 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
177 {
178         if ((void *)info != dmar_pci_notify_info_buf)
179                 kfree(info);
180 }
181
182 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
183                                 struct acpi_dmar_pci_path *path, int count)
184 {
185         int i;
186
187         if (info->bus != bus)
188                 goto fallback;
189         if (info->level != count)
190                 goto fallback;
191
192         for (i = 0; i < count; i++) {
193                 if (path[i].device != info->path[i].device ||
194                     path[i].function != info->path[i].function)
195                         goto fallback;
196         }
197
198         return true;
199
200 fallback:
201
202         if (count != 1)
203                 return false;
204
205         i = info->level - 1;
206         if (bus              == info->path[i].bus &&
207             path[0].device   == info->path[i].device &&
208             path[0].function == info->path[i].function) {
209                 pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
210                         bus, path[0].device, path[0].function);
211                 return true;
212         }
213
214         return false;
215 }
216
217 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
218 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
219                           void *start, void*end, u16 segment,
220                           struct dmar_dev_scope *devices,
221                           int devices_cnt)
222 {
223         int i, level;
224         struct device *tmp, *dev = &info->dev->dev;
225         struct acpi_dmar_device_scope *scope;
226         struct acpi_dmar_pci_path *path;
227
228         if (segment != info->seg)
229                 return 0;
230
231         for (; start < end; start += scope->length) {
232                 scope = start;
233                 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
234                     scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
235                         continue;
236
237                 path = (struct acpi_dmar_pci_path *)(scope + 1);
238                 level = (scope->length - sizeof(*scope)) / sizeof(*path);
239                 if (!dmar_match_pci_path(info, scope->bus, path, level))
240                         continue;
241
242                 /*
243                  * We expect devices with endpoint scope to have normal PCI
244                  * headers, and devices with bridge scope to have bridge PCI
245                  * headers.  However PCI NTB devices may be listed in the
246                  * DMAR table with bridge scope, even though they have a
247                  * normal PCI header.  NTB devices are identified by class
248                  * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
249                  * for this special case.
250                  */
251                 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
252                      info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
253                     (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
254                      (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
255                       info->dev->class >> 8 != PCI_CLASS_BRIDGE_OTHER))) {
256                         pr_warn("Device scope type does not match for %s\n",
257                                 pci_name(info->dev));
258                         return -EINVAL;
259                 }
260
261                 for_each_dev_scope(devices, devices_cnt, i, tmp)
262                         if (tmp == NULL) {
263                                 devices[i].bus = info->dev->bus->number;
264                                 devices[i].devfn = info->dev->devfn;
265                                 rcu_assign_pointer(devices[i].dev,
266                                                    get_device(dev));
267                                 return 1;
268                         }
269                 BUG_ON(i >= devices_cnt);
270         }
271
272         return 0;
273 }
274
275 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
276                           struct dmar_dev_scope *devices, int count)
277 {
278         int index;
279         struct device *tmp;
280
281         if (info->seg != segment)
282                 return 0;
283
284         for_each_active_dev_scope(devices, count, index, tmp)
285                 if (tmp == &info->dev->dev) {
286                         RCU_INIT_POINTER(devices[index].dev, NULL);
287                         synchronize_rcu();
288                         put_device(tmp);
289                         return 1;
290                 }
291
292         return 0;
293 }
294
295 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
296 {
297         int ret = 0;
298         struct dmar_drhd_unit *dmaru;
299         struct acpi_dmar_hardware_unit *drhd;
300
301         for_each_drhd_unit(dmaru) {
302                 if (dmaru->include_all)
303                         continue;
304
305                 drhd = container_of(dmaru->hdr,
306                                     struct acpi_dmar_hardware_unit, header);
307                 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
308                                 ((void *)drhd) + drhd->header.length,
309                                 dmaru->segment,
310                                 dmaru->devices, dmaru->devices_cnt);
311                 if (ret)
312                         break;
313         }
314         if (ret >= 0)
315                 ret = dmar_iommu_notify_scope_dev(info);
316         if (ret < 0 && dmar_dev_scope_status == 0)
317                 dmar_dev_scope_status = ret;
318
319         return ret;
320 }
321
322 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
323 {
324         struct dmar_drhd_unit *dmaru;
325
326         for_each_drhd_unit(dmaru)
327                 if (dmar_remove_dev_scope(info, dmaru->segment,
328                         dmaru->devices, dmaru->devices_cnt))
329                         break;
330         dmar_iommu_notify_scope_dev(info);
331 }
332
333 static int dmar_pci_bus_notifier(struct notifier_block *nb,
334                                  unsigned long action, void *data)
335 {
336         struct pci_dev *pdev = to_pci_dev(data);
337         struct dmar_pci_notify_info *info;
338
339         /* Only care about add/remove events for physical functions.
340          * For VFs we actually do the lookup based on the corresponding
341          * PF in device_to_iommu() anyway. */
342         if (pdev->is_virtfn)
343                 return NOTIFY_DONE;
344         if (action != BUS_NOTIFY_ADD_DEVICE &&
345             action != BUS_NOTIFY_REMOVED_DEVICE)
346                 return NOTIFY_DONE;
347
348         info = dmar_alloc_pci_notify_info(pdev, action);
349         if (!info)
350                 return NOTIFY_DONE;
351
352         down_write(&dmar_global_lock);
353         if (action == BUS_NOTIFY_ADD_DEVICE)
354                 dmar_pci_bus_add_dev(info);
355         else if (action == BUS_NOTIFY_REMOVED_DEVICE)
356                 dmar_pci_bus_del_dev(info);
357         up_write(&dmar_global_lock);
358
359         dmar_free_pci_notify_info(info);
360
361         return NOTIFY_OK;
362 }
363
364 static struct notifier_block dmar_pci_bus_nb = {
365         .notifier_call = dmar_pci_bus_notifier,
366         .priority = 1,
367 };
368
369 static struct dmar_drhd_unit *
370 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
371 {
372         struct dmar_drhd_unit *dmaru;
373
374         list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
375                                 dmar_rcu_check())
376                 if (dmaru->segment == drhd->segment &&
377                     dmaru->reg_base_addr == drhd->address)
378                         return dmaru;
379
380         return NULL;
381 }
382
383 /**
384  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
385  * structure which uniquely represent one DMA remapping hardware unit
386  * present in the platform
387  */
388 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
389 {
390         struct acpi_dmar_hardware_unit *drhd;
391         struct dmar_drhd_unit *dmaru;
392         int ret;
393
394         drhd = (struct acpi_dmar_hardware_unit *)header;
395         dmaru = dmar_find_dmaru(drhd);
396         if (dmaru)
397                 goto out;
398
399         dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
400         if (!dmaru)
401                 return -ENOMEM;
402
403         /*
404          * If header is allocated from slab by ACPI _DSM method, we need to
405          * copy the content because the memory buffer will be freed on return.
406          */
407         dmaru->hdr = (void *)(dmaru + 1);
408         memcpy(dmaru->hdr, header, header->length);
409         dmaru->reg_base_addr = drhd->address;
410         dmaru->segment = drhd->segment;
411         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
412         dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
413                                               ((void *)drhd) + drhd->header.length,
414                                               &dmaru->devices_cnt);
415         if (dmaru->devices_cnt && dmaru->devices == NULL) {
416                 kfree(dmaru);
417                 return -ENOMEM;
418         }
419
420         ret = alloc_iommu(dmaru);
421         if (ret) {
422                 dmar_free_dev_scope(&dmaru->devices,
423                                     &dmaru->devices_cnt);
424                 kfree(dmaru);
425                 return ret;
426         }
427         dmar_register_drhd_unit(dmaru);
428
429 out:
430         if (arg)
431                 (*(int *)arg)++;
432
433         return 0;
434 }
435
436 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
437 {
438         if (dmaru->devices && dmaru->devices_cnt)
439                 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
440         if (dmaru->iommu)
441                 free_iommu(dmaru->iommu);
442         kfree(dmaru);
443 }
444
445 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
446                                       void *arg)
447 {
448         struct acpi_dmar_andd *andd = (void *)header;
449
450         /* Check for NUL termination within the designated length */
451         if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
452                 pr_warn(FW_BUG
453                            "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
454                            "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
455                            dmi_get_system_info(DMI_BIOS_VENDOR),
456                            dmi_get_system_info(DMI_BIOS_VERSION),
457                            dmi_get_system_info(DMI_PRODUCT_VERSION));
458                 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
459                 return -EINVAL;
460         }
461         pr_info("ANDD device: %x name: %s\n", andd->device_number,
462                 andd->device_name);
463
464         return 0;
465 }
466
467 #ifdef CONFIG_ACPI_NUMA
468 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
469 {
470         struct acpi_dmar_rhsa *rhsa;
471         struct dmar_drhd_unit *drhd;
472
473         rhsa = (struct acpi_dmar_rhsa *)header;
474         for_each_drhd_unit(drhd) {
475                 if (drhd->reg_base_addr == rhsa->base_address) {
476                         int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
477
478                         if (node != NUMA_NO_NODE && !node_online(node))
479                                 node = NUMA_NO_NODE;
480                         drhd->iommu->node = node;
481                         return 0;
482                 }
483         }
484         pr_warn(FW_BUG
485                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
486                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
487                 rhsa->base_address,
488                 dmi_get_system_info(DMI_BIOS_VENDOR),
489                 dmi_get_system_info(DMI_BIOS_VERSION),
490                 dmi_get_system_info(DMI_PRODUCT_VERSION));
491         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
492
493         return 0;
494 }
495 #else
496 #define dmar_parse_one_rhsa             dmar_res_noop
497 #endif
498
499 static void
500 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
501 {
502         struct acpi_dmar_hardware_unit *drhd;
503         struct acpi_dmar_reserved_memory *rmrr;
504         struct acpi_dmar_atsr *atsr;
505         struct acpi_dmar_rhsa *rhsa;
506
507         switch (header->type) {
508         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
509                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
510                                     header);
511                 pr_info("DRHD base: %#016Lx flags: %#x\n",
512                         (unsigned long long)drhd->address, drhd->flags);
513                 break;
514         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
515                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
516                                     header);
517                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
518                         (unsigned long long)rmrr->base_address,
519                         (unsigned long long)rmrr->end_address);
520                 break;
521         case ACPI_DMAR_TYPE_ROOT_ATS:
522                 atsr = container_of(header, struct acpi_dmar_atsr, header);
523                 pr_info("ATSR flags: %#x\n", atsr->flags);
524                 break;
525         case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
526                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
527                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
528                        (unsigned long long)rhsa->base_address,
529                        rhsa->proximity_domain);
530                 break;
531         case ACPI_DMAR_TYPE_NAMESPACE:
532                 /* We don't print this here because we need to sanity-check
533                    it first. So print it in dmar_parse_one_andd() instead. */
534                 break;
535         }
536 }
537
538 /**
539  * dmar_table_detect - checks to see if the platform supports DMAR devices
540  */
541 static int __init dmar_table_detect(void)
542 {
543         acpi_status status = AE_OK;
544
545         /* if we could find DMAR table, then there are DMAR devices */
546         status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
547
548         if (ACPI_SUCCESS(status) && !dmar_tbl) {
549                 pr_warn("Unable to map DMAR\n");
550                 status = AE_NOT_FOUND;
551         }
552
553         return ACPI_SUCCESS(status) ? 0 : -ENOENT;
554 }
555
556 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
557                                        size_t len, struct dmar_res_callback *cb)
558 {
559         struct acpi_dmar_header *iter, *next;
560         struct acpi_dmar_header *end = ((void *)start) + len;
561
562         for (iter = start; iter < end; iter = next) {
563                 next = (void *)iter + iter->length;
564                 if (iter->length == 0) {
565                         /* Avoid looping forever on bad ACPI tables */
566                         pr_debug(FW_BUG "Invalid 0-length structure\n");
567                         break;
568                 } else if (next > end) {
569                         /* Avoid passing table end */
570                         pr_warn(FW_BUG "Record passes table end\n");
571                         return -EINVAL;
572                 }
573
574                 if (cb->print_entry)
575                         dmar_table_print_dmar_entry(iter);
576
577                 if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
578                         /* continue for forward compatibility */
579                         pr_debug("Unknown DMAR structure type %d\n",
580                                  iter->type);
581                 } else if (cb->cb[iter->type]) {
582                         int ret;
583
584                         ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
585                         if (ret)
586                                 return ret;
587                 } else if (!cb->ignore_unhandled) {
588                         pr_warn("No handler for DMAR structure type %d\n",
589                                 iter->type);
590                         return -EINVAL;
591                 }
592         }
593
594         return 0;
595 }
596
597 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
598                                        struct dmar_res_callback *cb)
599 {
600         return dmar_walk_remapping_entries((void *)(dmar + 1),
601                         dmar->header.length - sizeof(*dmar), cb);
602 }
603
604 /**
605  * parse_dmar_table - parses the DMA reporting table
606  */
607 static int __init
608 parse_dmar_table(void)
609 {
610         struct acpi_table_dmar *dmar;
611         int drhd_count = 0;
612         int ret;
613         struct dmar_res_callback cb = {
614                 .print_entry = true,
615                 .ignore_unhandled = true,
616                 .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
617                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
618                 .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
619                 .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
620                 .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
621                 .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
622         };
623
624         /*
625          * Do it again, earlier dmar_tbl mapping could be mapped with
626          * fixed map.
627          */
628         dmar_table_detect();
629
630         /*
631          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
632          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
633          */
634         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
635
636         dmar = (struct acpi_table_dmar *)dmar_tbl;
637         if (!dmar)
638                 return -ENODEV;
639
640         if (dmar->width < PAGE_SHIFT - 1) {
641                 pr_warn("Invalid DMAR haw\n");
642                 return -EINVAL;
643         }
644
645         pr_info("Host address width %d\n", dmar->width + 1);
646         ret = dmar_walk_dmar_table(dmar, &cb);
647         if (ret == 0 && drhd_count == 0)
648                 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
649
650         return ret;
651 }
652
653 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
654                                  int cnt, struct pci_dev *dev)
655 {
656         int index;
657         struct device *tmp;
658
659         while (dev) {
660                 for_each_active_dev_scope(devices, cnt, index, tmp)
661                         if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
662                                 return 1;
663
664                 /* Check our parent */
665                 dev = dev->bus->self;
666         }
667
668         return 0;
669 }
670
671 struct dmar_drhd_unit *
672 dmar_find_matched_drhd_unit(struct pci_dev *dev)
673 {
674         struct dmar_drhd_unit *dmaru;
675         struct acpi_dmar_hardware_unit *drhd;
676
677         dev = pci_physfn(dev);
678
679         rcu_read_lock();
680         for_each_drhd_unit(dmaru) {
681                 drhd = container_of(dmaru->hdr,
682                                     struct acpi_dmar_hardware_unit,
683                                     header);
684
685                 if (dmaru->include_all &&
686                     drhd->segment == pci_domain_nr(dev->bus))
687                         goto out;
688
689                 if (dmar_pci_device_match(dmaru->devices,
690                                           dmaru->devices_cnt, dev))
691                         goto out;
692         }
693         dmaru = NULL;
694 out:
695         rcu_read_unlock();
696
697         return dmaru;
698 }
699
700 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
701                                               struct acpi_device *adev)
702 {
703         struct dmar_drhd_unit *dmaru;
704         struct acpi_dmar_hardware_unit *drhd;
705         struct acpi_dmar_device_scope *scope;
706         struct device *tmp;
707         int i;
708         struct acpi_dmar_pci_path *path;
709
710         for_each_drhd_unit(dmaru) {
711                 drhd = container_of(dmaru->hdr,
712                                     struct acpi_dmar_hardware_unit,
713                                     header);
714
715                 for (scope = (void *)(drhd + 1);
716                      (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
717                      scope = ((void *)scope) + scope->length) {
718                         if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
719                                 continue;
720                         if (scope->enumeration_id != device_number)
721                                 continue;
722
723                         path = (void *)(scope + 1);
724                         pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
725                                 dev_name(&adev->dev), dmaru->reg_base_addr,
726                                 scope->bus, path->device, path->function);
727                         for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
728                                 if (tmp == NULL) {
729                                         dmaru->devices[i].bus = scope->bus;
730                                         dmaru->devices[i].devfn = PCI_DEVFN(path->device,
731                                                                             path->function);
732                                         rcu_assign_pointer(dmaru->devices[i].dev,
733                                                            get_device(&adev->dev));
734                                         return;
735                                 }
736                         BUG_ON(i >= dmaru->devices_cnt);
737                 }
738         }
739         pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
740                 device_number, dev_name(&adev->dev));
741 }
742
743 static int __init dmar_acpi_dev_scope_init(void)
744 {
745         struct acpi_dmar_andd *andd;
746
747         if (dmar_tbl == NULL)
748                 return -ENODEV;
749
750         for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
751              ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
752              andd = ((void *)andd) + andd->header.length) {
753                 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
754                         acpi_handle h;
755                         struct acpi_device *adev;
756
757                         if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
758                                                           andd->device_name,
759                                                           &h))) {
760                                 pr_err("Failed to find handle for ACPI object %s\n",
761                                        andd->device_name);
762                                 continue;
763                         }
764                         if (acpi_bus_get_device(h, &adev)) {
765                                 pr_err("Failed to get device for ACPI object %s\n",
766                                        andd->device_name);
767                                 continue;
768                         }
769                         dmar_acpi_insert_dev_scope(andd->device_number, adev);
770                 }
771         }
772         return 0;
773 }
774
775 int __init dmar_dev_scope_init(void)
776 {
777         struct pci_dev *dev = NULL;
778         struct dmar_pci_notify_info *info;
779
780         if (dmar_dev_scope_status != 1)
781                 return dmar_dev_scope_status;
782
783         if (list_empty(&dmar_drhd_units)) {
784                 dmar_dev_scope_status = -ENODEV;
785         } else {
786                 dmar_dev_scope_status = 0;
787
788                 dmar_acpi_dev_scope_init();
789
790                 for_each_pci_dev(dev) {
791                         if (dev->is_virtfn)
792                                 continue;
793
794                         info = dmar_alloc_pci_notify_info(dev,
795                                         BUS_NOTIFY_ADD_DEVICE);
796                         if (!info) {
797                                 pci_dev_put(dev);
798                                 return dmar_dev_scope_status;
799                         } else {
800                                 dmar_pci_bus_add_dev(info);
801                                 dmar_free_pci_notify_info(info);
802                         }
803                 }
804         }
805
806         return dmar_dev_scope_status;
807 }
808
809 void __init dmar_register_bus_notifier(void)
810 {
811         bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
812 }
813
814
815 int __init dmar_table_init(void)
816 {
817         static int dmar_table_initialized;
818         int ret;
819
820         if (dmar_table_initialized == 0) {
821                 ret = parse_dmar_table();
822                 if (ret < 0) {
823                         if (ret != -ENODEV)
824                                 pr_info("Parse DMAR table failure.\n");
825                 } else  if (list_empty(&dmar_drhd_units)) {
826                         pr_info("No DMAR devices found\n");
827                         ret = -ENODEV;
828                 }
829
830                 if (ret < 0)
831                         dmar_table_initialized = ret;
832                 else
833                         dmar_table_initialized = 1;
834         }
835
836         return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
837 }
838
839 static void warn_invalid_dmar(u64 addr, const char *message)
840 {
841         pr_warn_once(FW_BUG
842                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
843                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
844                 addr, message,
845                 dmi_get_system_info(DMI_BIOS_VENDOR),
846                 dmi_get_system_info(DMI_BIOS_VERSION),
847                 dmi_get_system_info(DMI_PRODUCT_VERSION));
848         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
849 }
850
851 static int __ref
852 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
853 {
854         struct acpi_dmar_hardware_unit *drhd;
855         void __iomem *addr;
856         u64 cap, ecap;
857
858         drhd = (void *)entry;
859         if (!drhd->address) {
860                 warn_invalid_dmar(0, "");
861                 return -EINVAL;
862         }
863
864         if (arg)
865                 addr = ioremap(drhd->address, VTD_PAGE_SIZE);
866         else
867                 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
868         if (!addr) {
869                 pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
870                 return -EINVAL;
871         }
872
873         cap = dmar_readq(addr + DMAR_CAP_REG);
874         ecap = dmar_readq(addr + DMAR_ECAP_REG);
875
876         if (arg)
877                 iounmap(addr);
878         else
879                 early_iounmap(addr, VTD_PAGE_SIZE);
880
881         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
882                 warn_invalid_dmar(drhd->address, " returns all ones");
883                 return -EINVAL;
884         }
885
886         return 0;
887 }
888
889 int __init detect_intel_iommu(void)
890 {
891         int ret;
892         struct dmar_res_callback validate_drhd_cb = {
893                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
894                 .ignore_unhandled = true,
895         };
896
897         down_write(&dmar_global_lock);
898         ret = dmar_table_detect();
899         if (!ret)
900                 ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
901                                            &validate_drhd_cb);
902         if (!ret && !no_iommu && !iommu_detected &&
903             (!dmar_disabled || dmar_platform_optin())) {
904                 iommu_detected = 1;
905                 /* Make sure ACS will be enabled */
906                 pci_request_acs();
907         }
908
909 #ifdef CONFIG_X86
910         if (!ret)
911                 x86_init.iommu.iommu_init = intel_iommu_init;
912 #endif
913
914         if (dmar_tbl) {
915                 acpi_put_table(dmar_tbl);
916                 dmar_tbl = NULL;
917         }
918         up_write(&dmar_global_lock);
919
920         return ret ? ret : 1;
921 }
922
923 static void unmap_iommu(struct intel_iommu *iommu)
924 {
925         iounmap(iommu->reg);
926         release_mem_region(iommu->reg_phys, iommu->reg_size);
927 }
928
929 /**
930  * map_iommu: map the iommu's registers
931  * @iommu: the iommu to map
932  * @phys_addr: the physical address of the base resgister
933  *
934  * Memory map the iommu's registers.  Start w/ a single page, and
935  * possibly expand if that turns out to be insufficent.
936  */
937 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
938 {
939         int map_size, err=0;
940
941         iommu->reg_phys = phys_addr;
942         iommu->reg_size = VTD_PAGE_SIZE;
943
944         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
945                 pr_err("Can't reserve memory\n");
946                 err = -EBUSY;
947                 goto out;
948         }
949
950         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
951         if (!iommu->reg) {
952                 pr_err("Can't map the region\n");
953                 err = -ENOMEM;
954                 goto release;
955         }
956
957         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
958         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
959
960         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
961                 err = -EINVAL;
962                 warn_invalid_dmar(phys_addr, " returns all ones");
963                 goto unmap;
964         }
965
966         /* the registers might be more than one page */
967         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
968                          cap_max_fault_reg_offset(iommu->cap));
969         map_size = VTD_PAGE_ALIGN(map_size);
970         if (map_size > iommu->reg_size) {
971                 iounmap(iommu->reg);
972                 release_mem_region(iommu->reg_phys, iommu->reg_size);
973                 iommu->reg_size = map_size;
974                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
975                                         iommu->name)) {
976                         pr_err("Can't reserve memory\n");
977                         err = -EBUSY;
978                         goto out;
979                 }
980                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
981                 if (!iommu->reg) {
982                         pr_err("Can't map the region\n");
983                         err = -ENOMEM;
984                         goto release;
985                 }
986         }
987         err = 0;
988         goto out;
989
990 unmap:
991         iounmap(iommu->reg);
992 release:
993         release_mem_region(iommu->reg_phys, iommu->reg_size);
994 out:
995         return err;
996 }
997
998 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
999 {
1000         iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1001                                             DMAR_UNITS_SUPPORTED);
1002         if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1003                 iommu->seq_id = -1;
1004         } else {
1005                 set_bit(iommu->seq_id, dmar_seq_ids);
1006                 sprintf(iommu->name, "dmar%d", iommu->seq_id);
1007         }
1008
1009         return iommu->seq_id;
1010 }
1011
1012 static void dmar_free_seq_id(struct intel_iommu *iommu)
1013 {
1014         if (iommu->seq_id >= 0) {
1015                 clear_bit(iommu->seq_id, dmar_seq_ids);
1016                 iommu->seq_id = -1;
1017         }
1018 }
1019
1020 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1021 {
1022         struct intel_iommu *iommu;
1023         u32 ver, sts;
1024         int agaw = -1;
1025         int msagaw = -1;
1026         int err;
1027
1028         if (!drhd->reg_base_addr) {
1029                 warn_invalid_dmar(0, "");
1030                 return -EINVAL;
1031         }
1032
1033         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1034         if (!iommu)
1035                 return -ENOMEM;
1036
1037         if (dmar_alloc_seq_id(iommu) < 0) {
1038                 pr_err("Failed to allocate seq_id\n");
1039                 err = -ENOSPC;
1040                 goto error;
1041         }
1042
1043         err = map_iommu(iommu, drhd->reg_base_addr);
1044         if (err) {
1045                 pr_err("Failed to map %s\n", iommu->name);
1046                 goto error_free_seq_id;
1047         }
1048
1049         err = -EINVAL;
1050         if (cap_sagaw(iommu->cap) == 0) {
1051                 pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1052                         iommu->name);
1053                 drhd->ignored = 1;
1054         }
1055
1056         if (!drhd->ignored) {
1057                 agaw = iommu_calculate_agaw(iommu);
1058                 if (agaw < 0) {
1059                         pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1060                                iommu->seq_id);
1061                         drhd->ignored = 1;
1062                 }
1063         }
1064         if (!drhd->ignored) {
1065                 msagaw = iommu_calculate_max_sagaw(iommu);
1066                 if (msagaw < 0) {
1067                         pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1068                                iommu->seq_id);
1069                         drhd->ignored = 1;
1070                         agaw = -1;
1071                 }
1072         }
1073         iommu->agaw = agaw;
1074         iommu->msagaw = msagaw;
1075         iommu->segment = drhd->segment;
1076
1077         iommu->node = NUMA_NO_NODE;
1078
1079         ver = readl(iommu->reg + DMAR_VER_REG);
1080         pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1081                 iommu->name,
1082                 (unsigned long long)drhd->reg_base_addr,
1083                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1084                 (unsigned long long)iommu->cap,
1085                 (unsigned long long)iommu->ecap);
1086
1087         /* Reflect status in gcmd */
1088         sts = readl(iommu->reg + DMAR_GSTS_REG);
1089         if (sts & DMA_GSTS_IRES)
1090                 iommu->gcmd |= DMA_GCMD_IRE;
1091         if (sts & DMA_GSTS_TES)
1092                 iommu->gcmd |= DMA_GCMD_TE;
1093         if (sts & DMA_GSTS_QIES)
1094                 iommu->gcmd |= DMA_GCMD_QIE;
1095
1096         raw_spin_lock_init(&iommu->register_lock);
1097
1098         /*
1099          * This is only for hotplug; at boot time intel_iommu_enabled won't
1100          * be set yet. When intel_iommu_init() runs, it registers the units
1101          * present at boot time, then sets intel_iommu_enabled.
1102          */
1103         if (intel_iommu_enabled && !drhd->ignored) {
1104                 err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1105                                              intel_iommu_groups,
1106                                              "%s", iommu->name);
1107                 if (err)
1108                         goto err_unmap;
1109
1110                 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
1111
1112                 err = iommu_device_register(&iommu->iommu);
1113                 if (err)
1114                         goto err_sysfs;
1115         }
1116
1117         drhd->iommu = iommu;
1118         iommu->drhd = drhd;
1119
1120         return 0;
1121
1122 err_sysfs:
1123         iommu_device_sysfs_remove(&iommu->iommu);
1124 err_unmap:
1125         unmap_iommu(iommu);
1126 error_free_seq_id:
1127         dmar_free_seq_id(iommu);
1128 error:
1129         kfree(iommu);
1130         return err;
1131 }
1132
1133 static void free_iommu(struct intel_iommu *iommu)
1134 {
1135         if (intel_iommu_enabled && !iommu->drhd->ignored) {
1136                 iommu_device_unregister(&iommu->iommu);
1137                 iommu_device_sysfs_remove(&iommu->iommu);
1138         }
1139
1140         if (iommu->irq) {
1141                 if (iommu->pr_irq) {
1142                         free_irq(iommu->pr_irq, iommu);
1143                         dmar_free_hwirq(iommu->pr_irq);
1144                         iommu->pr_irq = 0;
1145                 }
1146                 free_irq(iommu->irq, iommu);
1147                 dmar_free_hwirq(iommu->irq);
1148                 iommu->irq = 0;
1149         }
1150
1151         if (iommu->qi) {
1152                 free_page((unsigned long)iommu->qi->desc);
1153                 kfree(iommu->qi->desc_status);
1154                 kfree(iommu->qi);
1155         }
1156
1157         if (iommu->reg)
1158                 unmap_iommu(iommu);
1159
1160         dmar_free_seq_id(iommu);
1161         kfree(iommu);
1162 }
1163
1164 /*
1165  * Reclaim all the submitted descriptors which have completed its work.
1166  */
1167 static inline void reclaim_free_desc(struct q_inval *qi)
1168 {
1169         while (qi->desc_status[qi->free_tail] == QI_DONE ||
1170                qi->desc_status[qi->free_tail] == QI_ABORT) {
1171                 qi->desc_status[qi->free_tail] = QI_FREE;
1172                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1173                 qi->free_cnt++;
1174         }
1175 }
1176
1177 static int qi_check_fault(struct intel_iommu *iommu, int index)
1178 {
1179         u32 fault;
1180         int head, tail;
1181         struct q_inval *qi = iommu->qi;
1182         int wait_index = (index + 1) % QI_LENGTH;
1183         int shift = qi_shift(iommu);
1184
1185         if (qi->desc_status[wait_index] == QI_ABORT)
1186                 return -EAGAIN;
1187
1188         fault = readl(iommu->reg + DMAR_FSTS_REG);
1189
1190         /*
1191          * If IQE happens, the head points to the descriptor associated
1192          * with the error. No new descriptors are fetched until the IQE
1193          * is cleared.
1194          */
1195         if (fault & DMA_FSTS_IQE) {
1196                 head = readl(iommu->reg + DMAR_IQH_REG);
1197                 if ((head >> shift) == index) {
1198                         struct qi_desc *desc = qi->desc + head;
1199
1200                         /*
1201                          * desc->qw2 and desc->qw3 are either reserved or
1202                          * used by software as private data. We won't print
1203                          * out these two qw's for security consideration.
1204                          */
1205                         pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
1206                                (unsigned long long)desc->qw0,
1207                                (unsigned long long)desc->qw1);
1208                         memcpy(desc, qi->desc + (wait_index << shift),
1209                                1 << shift);
1210                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1211                         return -EINVAL;
1212                 }
1213         }
1214
1215         /*
1216          * If ITE happens, all pending wait_desc commands are aborted.
1217          * No new descriptors are fetched until the ITE is cleared.
1218          */
1219         if (fault & DMA_FSTS_ITE) {
1220                 head = readl(iommu->reg + DMAR_IQH_REG);
1221                 head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1222                 head |= 1;
1223                 tail = readl(iommu->reg + DMAR_IQT_REG);
1224                 tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1225
1226                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1227
1228                 do {
1229                         if (qi->desc_status[head] == QI_IN_USE)
1230                                 qi->desc_status[head] = QI_ABORT;
1231                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1232                 } while (head != tail);
1233
1234                 if (qi->desc_status[wait_index] == QI_ABORT)
1235                         return -EAGAIN;
1236         }
1237
1238         if (fault & DMA_FSTS_ICE)
1239                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1240
1241         return 0;
1242 }
1243
1244 /*
1245  * Submit the queued invalidation descriptor to the remapping
1246  * hardware unit and wait for its completion.
1247  */
1248 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
1249 {
1250         int rc;
1251         struct q_inval *qi = iommu->qi;
1252         int offset, shift, length;
1253         struct qi_desc wait_desc;
1254         int wait_index, index;
1255         unsigned long flags;
1256
1257         if (!qi)
1258                 return 0;
1259
1260 restart:
1261         rc = 0;
1262
1263         raw_spin_lock_irqsave(&qi->q_lock, flags);
1264         while (qi->free_cnt < 3) {
1265                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1266                 cpu_relax();
1267                 raw_spin_lock_irqsave(&qi->q_lock, flags);
1268         }
1269
1270         index = qi->free_head;
1271         wait_index = (index + 1) % QI_LENGTH;
1272         shift = qi_shift(iommu);
1273         length = 1 << shift;
1274
1275         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
1276
1277         offset = index << shift;
1278         memcpy(qi->desc + offset, desc, length);
1279         wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1280                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1281         wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1282         wait_desc.qw2 = 0;
1283         wait_desc.qw3 = 0;
1284
1285         offset = wait_index << shift;
1286         memcpy(qi->desc + offset, &wait_desc, length);
1287
1288         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
1289         qi->free_cnt -= 2;
1290
1291         /*
1292          * update the HW tail register indicating the presence of
1293          * new descriptors.
1294          */
1295         writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1296
1297         while (qi->desc_status[wait_index] != QI_DONE) {
1298                 /*
1299                  * We will leave the interrupts disabled, to prevent interrupt
1300                  * context to queue another cmd while a cmd is already submitted
1301                  * and waiting for completion on this cpu. This is to avoid
1302                  * a deadlock where the interrupt context can wait indefinitely
1303                  * for free slots in the queue.
1304                  */
1305                 rc = qi_check_fault(iommu, index);
1306                 if (rc)
1307                         break;
1308
1309                 raw_spin_unlock(&qi->q_lock);
1310                 cpu_relax();
1311                 raw_spin_lock(&qi->q_lock);
1312         }
1313
1314         qi->desc_status[index] = QI_DONE;
1315
1316         reclaim_free_desc(qi);
1317         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1318
1319         if (rc == -EAGAIN)
1320                 goto restart;
1321
1322         return rc;
1323 }
1324
1325 /*
1326  * Flush the global interrupt entry cache.
1327  */
1328 void qi_global_iec(struct intel_iommu *iommu)
1329 {
1330         struct qi_desc desc;
1331
1332         desc.qw0 = QI_IEC_TYPE;
1333         desc.qw1 = 0;
1334         desc.qw2 = 0;
1335         desc.qw3 = 0;
1336
1337         /* should never fail */
1338         qi_submit_sync(&desc, iommu);
1339 }
1340
1341 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1342                       u64 type)
1343 {
1344         struct qi_desc desc;
1345
1346         desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1347                         | QI_CC_GRAN(type) | QI_CC_TYPE;
1348         desc.qw1 = 0;
1349         desc.qw2 = 0;
1350         desc.qw3 = 0;
1351
1352         qi_submit_sync(&desc, iommu);
1353 }
1354
1355 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1356                     unsigned int size_order, u64 type)
1357 {
1358         u8 dw = 0, dr = 0;
1359
1360         struct qi_desc desc;
1361         int ih = 0;
1362
1363         if (cap_write_drain(iommu->cap))
1364                 dw = 1;
1365
1366         if (cap_read_drain(iommu->cap))
1367                 dr = 1;
1368
1369         desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1370                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1371         desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1372                 | QI_IOTLB_AM(size_order);
1373         desc.qw2 = 0;
1374         desc.qw3 = 0;
1375
1376         qi_submit_sync(&desc, iommu);
1377 }
1378
1379 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1380                         u16 qdep, u64 addr, unsigned mask)
1381 {
1382         struct qi_desc desc;
1383
1384         if (mask) {
1385                 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1386                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1387         } else
1388                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1389
1390         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1391                 qdep = 0;
1392
1393         desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1394                    QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1395         desc.qw2 = 0;
1396         desc.qw3 = 0;
1397
1398         qi_submit_sync(&desc, iommu);
1399 }
1400
1401 /*
1402  * Disable Queued Invalidation interface.
1403  */
1404 void dmar_disable_qi(struct intel_iommu *iommu)
1405 {
1406         unsigned long flags;
1407         u32 sts;
1408         cycles_t start_time = get_cycles();
1409
1410         if (!ecap_qis(iommu->ecap))
1411                 return;
1412
1413         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1414
1415         sts =  readl(iommu->reg + DMAR_GSTS_REG);
1416         if (!(sts & DMA_GSTS_QIES))
1417                 goto end;
1418
1419         /*
1420          * Give a chance to HW to complete the pending invalidation requests.
1421          */
1422         while ((readl(iommu->reg + DMAR_IQT_REG) !=
1423                 readl(iommu->reg + DMAR_IQH_REG)) &&
1424                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1425                 cpu_relax();
1426
1427         iommu->gcmd &= ~DMA_GCMD_QIE;
1428         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1429
1430         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1431                       !(sts & DMA_GSTS_QIES), sts);
1432 end:
1433         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1434 }
1435
1436 /*
1437  * Enable queued invalidation.
1438  */
1439 static void __dmar_enable_qi(struct intel_iommu *iommu)
1440 {
1441         u32 sts;
1442         unsigned long flags;
1443         struct q_inval *qi = iommu->qi;
1444         u64 val = virt_to_phys(qi->desc);
1445
1446         qi->free_head = qi->free_tail = 0;
1447         qi->free_cnt = QI_LENGTH;
1448
1449         /*
1450          * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1451          * is present.
1452          */
1453         if (ecap_smts(iommu->ecap))
1454                 val |= (1 << 11) | 1;
1455
1456         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1457
1458         /* write zero to the tail reg */
1459         writel(0, iommu->reg + DMAR_IQT_REG);
1460
1461         dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1462
1463         iommu->gcmd |= DMA_GCMD_QIE;
1464         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1465
1466         /* Make sure hardware complete it */
1467         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1468
1469         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1470 }
1471
1472 /*
1473  * Enable Queued Invalidation interface. This is a must to support
1474  * interrupt-remapping. Also used by DMA-remapping, which replaces
1475  * register based IOTLB invalidation.
1476  */
1477 int dmar_enable_qi(struct intel_iommu *iommu)
1478 {
1479         struct q_inval *qi;
1480         struct page *desc_page;
1481
1482         if (!ecap_qis(iommu->ecap))
1483                 return -ENOENT;
1484
1485         /*
1486          * queued invalidation is already setup and enabled.
1487          */
1488         if (iommu->qi)
1489                 return 0;
1490
1491         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1492         if (!iommu->qi)
1493                 return -ENOMEM;
1494
1495         qi = iommu->qi;
1496
1497         /*
1498          * Need two pages to accommodate 256 descriptors of 256 bits each
1499          * if the remapping hardware supports scalable mode translation.
1500          */
1501         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1502                                      !!ecap_smts(iommu->ecap));
1503         if (!desc_page) {
1504                 kfree(qi);
1505                 iommu->qi = NULL;
1506                 return -ENOMEM;
1507         }
1508
1509         qi->desc = page_address(desc_page);
1510
1511         qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1512         if (!qi->desc_status) {
1513                 free_page((unsigned long) qi->desc);
1514                 kfree(qi);
1515                 iommu->qi = NULL;
1516                 return -ENOMEM;
1517         }
1518
1519         raw_spin_lock_init(&qi->q_lock);
1520
1521         __dmar_enable_qi(iommu);
1522
1523         return 0;
1524 }
1525
1526 /* iommu interrupt handling. Most stuff are MSI-like. */
1527
1528 enum faulttype {
1529         DMA_REMAP,
1530         INTR_REMAP,
1531         UNKNOWN,
1532 };
1533
1534 static const char *dma_remap_fault_reasons[] =
1535 {
1536         "Software",
1537         "Present bit in root entry is clear",
1538         "Present bit in context entry is clear",
1539         "Invalid context entry",
1540         "Access beyond MGAW",
1541         "PTE Write access is not set",
1542         "PTE Read access is not set",
1543         "Next page table ptr is invalid",
1544         "Root table address invalid",
1545         "Context table ptr is invalid",
1546         "non-zero reserved fields in RTP",
1547         "non-zero reserved fields in CTP",
1548         "non-zero reserved fields in PTE",
1549         "PCE for translation request specifies blocking",
1550 };
1551
1552 static const char * const dma_remap_sm_fault_reasons[] = {
1553         "SM: Invalid Root Table Address",
1554         "SM: TTM 0 for request with PASID",
1555         "SM: TTM 0 for page group request",
1556         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1557         "SM: Error attempting to access Root Entry",
1558         "SM: Present bit in Root Entry is clear",
1559         "SM: Non-zero reserved field set in Root Entry",
1560         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1561         "SM: Error attempting to access Context Entry",
1562         "SM: Present bit in Context Entry is clear",
1563         "SM: Non-zero reserved field set in the Context Entry",
1564         "SM: Invalid Context Entry",
1565         "SM: DTE field in Context Entry is clear",
1566         "SM: PASID Enable field in Context Entry is clear",
1567         "SM: PASID is larger than the max in Context Entry",
1568         "SM: PRE field in Context-Entry is clear",
1569         "SM: RID_PASID field error in Context-Entry",
1570         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1571         "SM: Error attempting to access the PASID Directory Entry",
1572         "SM: Present bit in Directory Entry is clear",
1573         "SM: Non-zero reserved field set in PASID Directory Entry",
1574         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1575         "SM: Error attempting to access PASID Table Entry",
1576         "SM: Present bit in PASID Table Entry is clear",
1577         "SM: Non-zero reserved field set in PASID Table Entry",
1578         "SM: Invalid Scalable-Mode PASID Table Entry",
1579         "SM: ERE field is clear in PASID Table Entry",
1580         "SM: SRE field is clear in PASID Table Entry",
1581         "Unknown", "Unknown",/* 0x5E-0x5F */
1582         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1583         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1584         "SM: Error attempting to access first-level paging entry",
1585         "SM: Present bit in first-level paging entry is clear",
1586         "SM: Non-zero reserved field set in first-level paging entry",
1587         "SM: Error attempting to access FL-PML4 entry",
1588         "SM: First-level entry address beyond MGAW in Nested translation",
1589         "SM: Read permission error in FL-PML4 entry in Nested translation",
1590         "SM: Read permission error in first-level paging entry in Nested translation",
1591         "SM: Write permission error in first-level paging entry in Nested translation",
1592         "SM: Error attempting to access second-level paging entry",
1593         "SM: Read/Write permission error in second-level paging entry",
1594         "SM: Non-zero reserved field set in second-level paging entry",
1595         "SM: Invalid second-level page table pointer",
1596         "SM: A/D bit update needed in second-level entry when set up in no snoop",
1597         "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1598         "SM: Address in first-level translation is not canonical",
1599         "SM: U/S set 0 for first-level translation with user privilege",
1600         "SM: No execute permission for request with PASID and ER=1",
1601         "SM: Address beyond the DMA hardware max",
1602         "SM: Second-level entry address beyond the max",
1603         "SM: No write permission for Write/AtomicOp request",
1604         "SM: No read permission for Read/AtomicOp request",
1605         "SM: Invalid address-interrupt address",
1606         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1607         "SM: A/D bit update needed in first-level entry when set up in no snoop",
1608 };
1609
1610 static const char *irq_remap_fault_reasons[] =
1611 {
1612         "Detected reserved fields in the decoded interrupt-remapped request",
1613         "Interrupt index exceeded the interrupt-remapping table size",
1614         "Present field in the IRTE entry is clear",
1615         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1616         "Detected reserved fields in the IRTE entry",
1617         "Blocked a compatibility format interrupt request",
1618         "Blocked an interrupt request due to source-id verification failure",
1619 };
1620
1621 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1622 {
1623         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1624                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1625                 *fault_type = INTR_REMAP;
1626                 return irq_remap_fault_reasons[fault_reason - 0x20];
1627         } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1628                         ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1629                 *fault_type = DMA_REMAP;
1630                 return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1631         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1632                 *fault_type = DMA_REMAP;
1633                 return dma_remap_fault_reasons[fault_reason];
1634         } else {
1635                 *fault_type = UNKNOWN;
1636                 return "Unknown";
1637         }
1638 }
1639
1640
1641 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1642 {
1643         if (iommu->irq == irq)
1644                 return DMAR_FECTL_REG;
1645         else if (iommu->pr_irq == irq)
1646                 return DMAR_PECTL_REG;
1647         else
1648                 BUG();
1649 }
1650
1651 void dmar_msi_unmask(struct irq_data *data)
1652 {
1653         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1654         int reg = dmar_msi_reg(iommu, data->irq);
1655         unsigned long flag;
1656
1657         /* unmask it */
1658         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1659         writel(0, iommu->reg + reg);
1660         /* Read a reg to force flush the post write */
1661         readl(iommu->reg + reg);
1662         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1663 }
1664
1665 void dmar_msi_mask(struct irq_data *data)
1666 {
1667         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1668         int reg = dmar_msi_reg(iommu, data->irq);
1669         unsigned long flag;
1670
1671         /* mask it */
1672         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1673         writel(DMA_FECTL_IM, iommu->reg + reg);
1674         /* Read a reg to force flush the post write */
1675         readl(iommu->reg + reg);
1676         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1677 }
1678
1679 void dmar_msi_write(int irq, struct msi_msg *msg)
1680 {
1681         struct intel_iommu *iommu = irq_get_handler_data(irq);
1682         int reg = dmar_msi_reg(iommu, irq);
1683         unsigned long flag;
1684
1685         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1686         writel(msg->data, iommu->reg + reg + 4);
1687         writel(msg->address_lo, iommu->reg + reg + 8);
1688         writel(msg->address_hi, iommu->reg + reg + 12);
1689         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1690 }
1691
1692 void dmar_msi_read(int irq, struct msi_msg *msg)
1693 {
1694         struct intel_iommu *iommu = irq_get_handler_data(irq);
1695         int reg = dmar_msi_reg(iommu, irq);
1696         unsigned long flag;
1697
1698         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1699         msg->data = readl(iommu->reg + reg + 4);
1700         msg->address_lo = readl(iommu->reg + reg + 8);
1701         msg->address_hi = readl(iommu->reg + reg + 12);
1702         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1703 }
1704
1705 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1706                 u8 fault_reason, int pasid, u16 source_id,
1707                 unsigned long long addr)
1708 {
1709         const char *reason;
1710         int fault_type;
1711
1712         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1713
1714         if (fault_type == INTR_REMAP)
1715                 pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
1716                         source_id >> 8, PCI_SLOT(source_id & 0xFF),
1717                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1718                         fault_reason, reason);
1719         else
1720                 pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
1721                        type ? "DMA Read" : "DMA Write",
1722                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1723                        PCI_FUNC(source_id & 0xFF), pasid, addr,
1724                        fault_reason, reason);
1725         return 0;
1726 }
1727
1728 #define PRIMARY_FAULT_REG_LEN (16)
1729 irqreturn_t dmar_fault(int irq, void *dev_id)
1730 {
1731         struct intel_iommu *iommu = dev_id;
1732         int reg, fault_index;
1733         u32 fault_status;
1734         unsigned long flag;
1735         static DEFINE_RATELIMIT_STATE(rs,
1736                                       DEFAULT_RATELIMIT_INTERVAL,
1737                                       DEFAULT_RATELIMIT_BURST);
1738
1739         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1740         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1741         if (fault_status && __ratelimit(&rs))
1742                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1743
1744         /* TBD: ignore advanced fault log currently */
1745         if (!(fault_status & DMA_FSTS_PPF))
1746                 goto unlock_exit;
1747
1748         fault_index = dma_fsts_fault_record_index(fault_status);
1749         reg = cap_fault_reg_offset(iommu->cap);
1750         while (1) {
1751                 /* Disable printing, simply clear the fault when ratelimited */
1752                 bool ratelimited = !__ratelimit(&rs);
1753                 u8 fault_reason;
1754                 u16 source_id;
1755                 u64 guest_addr;
1756                 int type, pasid;
1757                 u32 data;
1758                 bool pasid_present;
1759
1760                 /* highest 32 bits */
1761                 data = readl(iommu->reg + reg +
1762                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1763                 if (!(data & DMA_FRCD_F))
1764                         break;
1765
1766                 if (!ratelimited) {
1767                         fault_reason = dma_frcd_fault_reason(data);
1768                         type = dma_frcd_type(data);
1769
1770                         pasid = dma_frcd_pasid_value(data);
1771                         data = readl(iommu->reg + reg +
1772                                      fault_index * PRIMARY_FAULT_REG_LEN + 8);
1773                         source_id = dma_frcd_source_id(data);
1774
1775                         pasid_present = dma_frcd_pasid_present(data);
1776                         guest_addr = dmar_readq(iommu->reg + reg +
1777                                         fault_index * PRIMARY_FAULT_REG_LEN);
1778                         guest_addr = dma_frcd_page_addr(guest_addr);
1779                 }
1780
1781                 /* clear the fault */
1782                 writel(DMA_FRCD_F, iommu->reg + reg +
1783                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1784
1785                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1786
1787                 if (!ratelimited)
1788                         /* Using pasid -1 if pasid is not present */
1789                         dmar_fault_do_one(iommu, type, fault_reason,
1790                                           pasid_present ? pasid : -1,
1791                                           source_id, guest_addr);
1792
1793                 fault_index++;
1794                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1795                         fault_index = 0;
1796                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1797         }
1798
1799         writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
1800                iommu->reg + DMAR_FSTS_REG);
1801
1802 unlock_exit:
1803         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1804         return IRQ_HANDLED;
1805 }
1806
1807 int dmar_set_interrupt(struct intel_iommu *iommu)
1808 {
1809         int irq, ret;
1810
1811         /*
1812          * Check if the fault interrupt is already initialized.
1813          */
1814         if (iommu->irq)
1815                 return 0;
1816
1817         irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
1818         if (irq > 0) {
1819                 iommu->irq = irq;
1820         } else {
1821                 pr_err("No free IRQ vectors\n");
1822                 return -EINVAL;
1823         }
1824
1825         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1826         if (ret)
1827                 pr_err("Can't request irq\n");
1828         return ret;
1829 }
1830
1831 int __init enable_drhd_fault_handling(void)
1832 {
1833         struct dmar_drhd_unit *drhd;
1834         struct intel_iommu *iommu;
1835
1836         /*
1837          * Enable fault control interrupt.
1838          */
1839         for_each_iommu(iommu, drhd) {
1840                 u32 fault_status;
1841                 int ret = dmar_set_interrupt(iommu);
1842
1843                 if (ret) {
1844                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1845                                (unsigned long long)drhd->reg_base_addr, ret);
1846                         return -1;
1847                 }
1848
1849                 /*
1850                  * Clear any previous faults.
1851                  */
1852                 dmar_fault(iommu->irq, iommu);
1853                 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1854                 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1855         }
1856
1857         return 0;
1858 }
1859
1860 /*
1861  * Re-enable Queued Invalidation interface.
1862  */
1863 int dmar_reenable_qi(struct intel_iommu *iommu)
1864 {
1865         if (!ecap_qis(iommu->ecap))
1866                 return -ENOENT;
1867
1868         if (!iommu->qi)
1869                 return -ENOENT;
1870
1871         /*
1872          * First disable queued invalidation.
1873          */
1874         dmar_disable_qi(iommu);
1875         /*
1876          * Then enable queued invalidation again. Since there is no pending
1877          * invalidation requests now, it's safe to re-enable queued
1878          * invalidation.
1879          */
1880         __dmar_enable_qi(iommu);
1881
1882         return 0;
1883 }
1884
1885 /*
1886  * Check interrupt remapping support in DMAR table description.
1887  */
1888 int __init dmar_ir_support(void)
1889 {
1890         struct acpi_table_dmar *dmar;
1891         dmar = (struct acpi_table_dmar *)dmar_tbl;
1892         if (!dmar)
1893                 return 0;
1894         return dmar->flags & 0x1;
1895 }
1896
1897 /* Check whether DMAR units are in use */
1898 static inline bool dmar_in_use(void)
1899 {
1900         return irq_remapping_enabled || intel_iommu_enabled;
1901 }
1902
1903 static int __init dmar_free_unused_resources(void)
1904 {
1905         struct dmar_drhd_unit *dmaru, *dmaru_n;
1906
1907         if (dmar_in_use())
1908                 return 0;
1909
1910         if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
1911                 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
1912
1913         down_write(&dmar_global_lock);
1914         list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
1915                 list_del(&dmaru->list);
1916                 dmar_free_drhd(dmaru);
1917         }
1918         up_write(&dmar_global_lock);
1919
1920         return 0;
1921 }
1922
1923 late_initcall(dmar_free_unused_resources);
1924 IOMMU_INIT_POST(detect_intel_iommu);
1925
1926 /*
1927  * DMAR Hotplug Support
1928  * For more details, please refer to Intel(R) Virtualization Technology
1929  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
1930  * "Remapping Hardware Unit Hot Plug".
1931  */
1932 static guid_t dmar_hp_guid =
1933         GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
1934                   0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
1935
1936 /*
1937  * Currently there's only one revision and BIOS will not check the revision id,
1938  * so use 0 for safety.
1939  */
1940 #define DMAR_DSM_REV_ID                 0
1941 #define DMAR_DSM_FUNC_DRHD              1
1942 #define DMAR_DSM_FUNC_ATSR              2
1943 #define DMAR_DSM_FUNC_RHSA              3
1944
1945 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
1946 {
1947         return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
1948 }
1949
1950 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
1951                                   dmar_res_handler_t handler, void *arg)
1952 {
1953         int ret = -ENODEV;
1954         union acpi_object *obj;
1955         struct acpi_dmar_header *start;
1956         struct dmar_res_callback callback;
1957         static int res_type[] = {
1958                 [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
1959                 [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
1960                 [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
1961         };
1962
1963         if (!dmar_detect_dsm(handle, func))
1964                 return 0;
1965
1966         obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
1967                                       func, NULL, ACPI_TYPE_BUFFER);
1968         if (!obj)
1969                 return -ENODEV;
1970
1971         memset(&callback, 0, sizeof(callback));
1972         callback.cb[res_type[func]] = handler;
1973         callback.arg[res_type[func]] = arg;
1974         start = (struct acpi_dmar_header *)obj->buffer.pointer;
1975         ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
1976
1977         ACPI_FREE(obj);
1978
1979         return ret;
1980 }
1981
1982 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
1983 {
1984         int ret;
1985         struct dmar_drhd_unit *dmaru;
1986
1987         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
1988         if (!dmaru)
1989                 return -ENODEV;
1990
1991         ret = dmar_ir_hotplug(dmaru, true);
1992         if (ret == 0)
1993                 ret = dmar_iommu_hotplug(dmaru, true);
1994
1995         return ret;
1996 }
1997
1998 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
1999 {
2000         int i, ret;
2001         struct device *dev;
2002         struct dmar_drhd_unit *dmaru;
2003
2004         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2005         if (!dmaru)
2006                 return 0;
2007
2008         /*
2009          * All PCI devices managed by this unit should have been destroyed.
2010          */
2011         if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2012                 for_each_active_dev_scope(dmaru->devices,
2013                                           dmaru->devices_cnt, i, dev)
2014                         return -EBUSY;
2015         }
2016
2017         ret = dmar_ir_hotplug(dmaru, false);
2018         if (ret == 0)
2019                 ret = dmar_iommu_hotplug(dmaru, false);
2020
2021         return ret;
2022 }
2023
2024 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2025 {
2026         struct dmar_drhd_unit *dmaru;
2027
2028         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2029         if (dmaru) {
2030                 list_del_rcu(&dmaru->list);
2031                 synchronize_rcu();
2032                 dmar_free_drhd(dmaru);
2033         }
2034
2035         return 0;
2036 }
2037
2038 static int dmar_hotplug_insert(acpi_handle handle)
2039 {
2040         int ret;
2041         int drhd_count = 0;
2042
2043         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2044                                      &dmar_validate_one_drhd, (void *)1);
2045         if (ret)
2046                 goto out;
2047
2048         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2049                                      &dmar_parse_one_drhd, (void *)&drhd_count);
2050         if (ret == 0 && drhd_count == 0) {
2051                 pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2052                 goto out;
2053         } else if (ret) {
2054                 goto release_drhd;
2055         }
2056
2057         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2058                                      &dmar_parse_one_rhsa, NULL);
2059         if (ret)
2060                 goto release_drhd;
2061
2062         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2063                                      &dmar_parse_one_atsr, NULL);
2064         if (ret)
2065                 goto release_atsr;
2066
2067         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2068                                      &dmar_hp_add_drhd, NULL);
2069         if (!ret)
2070                 return 0;
2071
2072         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2073                                &dmar_hp_remove_drhd, NULL);
2074 release_atsr:
2075         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2076                                &dmar_release_one_atsr, NULL);
2077 release_drhd:
2078         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2079                                &dmar_hp_release_drhd, NULL);
2080 out:
2081         return ret;
2082 }
2083
2084 static int dmar_hotplug_remove(acpi_handle handle)
2085 {
2086         int ret;
2087
2088         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2089                                      &dmar_check_one_atsr, NULL);
2090         if (ret)
2091                 return ret;
2092
2093         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2094                                      &dmar_hp_remove_drhd, NULL);
2095         if (ret == 0) {
2096                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2097                                                &dmar_release_one_atsr, NULL));
2098                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2099                                                &dmar_hp_release_drhd, NULL));
2100         } else {
2101                 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2102                                        &dmar_hp_add_drhd, NULL);
2103         }
2104
2105         return ret;
2106 }
2107
2108 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2109                                        void *context, void **retval)
2110 {
2111         acpi_handle *phdl = retval;
2112
2113         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2114                 *phdl = handle;
2115                 return AE_CTRL_TERMINATE;
2116         }
2117
2118         return AE_OK;
2119 }
2120
2121 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2122 {
2123         int ret;
2124         acpi_handle tmp = NULL;
2125         acpi_status status;
2126
2127         if (!dmar_in_use())
2128                 return 0;
2129
2130         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2131                 tmp = handle;
2132         } else {
2133                 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2134                                              ACPI_UINT32_MAX,
2135                                              dmar_get_dsm_handle,
2136                                              NULL, NULL, &tmp);
2137                 if (ACPI_FAILURE(status)) {
2138                         pr_warn("Failed to locate _DSM method.\n");
2139                         return -ENXIO;
2140                 }
2141         }
2142         if (tmp == NULL)
2143                 return 0;
2144
2145         down_write(&dmar_global_lock);
2146         if (insert)
2147                 ret = dmar_hotplug_insert(tmp);
2148         else
2149                 ret = dmar_hotplug_remove(tmp);
2150         up_write(&dmar_global_lock);
2151
2152         return ret;
2153 }
2154
2155 int dmar_device_add(acpi_handle handle)
2156 {
2157         return dmar_device_hotplug(handle, true);
2158 }
2159
2160 int dmar_device_remove(acpi_handle handle)
2161 {
2162         return dmar_device_hotplug(handle, false);
2163 }
2164
2165 /*
2166  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2167  *
2168  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2169  * the ACPI DMAR table. This means that the platform boot firmware has made
2170  * sure no device can issue DMA outside of RMRR regions.
2171  */
2172 bool dmar_platform_optin(void)
2173 {
2174         struct acpi_table_dmar *dmar;
2175         acpi_status status;
2176         bool ret;
2177
2178         status = acpi_get_table(ACPI_SIG_DMAR, 0,
2179                                 (struct acpi_table_header **)&dmar);
2180         if (ACPI_FAILURE(status))
2181                 return false;
2182
2183         ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2184         acpi_put_table((struct acpi_table_header *)dmar);
2185
2186         return ret;
2187 }
2188 EXPORT_SYMBOL_GPL(dmar_platform_optin);