GNU Linux-libre 6.9.1-gnu
[releases.git] / drivers / iommu / intel / svm.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2015 Intel Corporation.
4  *
5  * Authors: David Woodhouse <dwmw2@infradead.org>
6  */
7
8 #include <linux/mmu_notifier.h>
9 #include <linux/sched.h>
10 #include <linux/sched/mm.h>
11 #include <linux/slab.h>
12 #include <linux/rculist.h>
13 #include <linux/pci.h>
14 #include <linux/pci-ats.h>
15 #include <linux/dmar.h>
16 #include <linux/interrupt.h>
17 #include <linux/mm_types.h>
18 #include <linux/xarray.h>
19 #include <asm/page.h>
20 #include <asm/fpu/api.h>
21
22 #include "iommu.h"
23 #include "pasid.h"
24 #include "perf.h"
25 #include "trace.h"
26
27 static irqreturn_t prq_event_thread(int irq, void *d);
28
29 static DEFINE_XARRAY_ALLOC(pasid_private_array);
30 static int pasid_private_add(ioasid_t pasid, void *priv)
31 {
32         return xa_alloc(&pasid_private_array, &pasid, priv,
33                         XA_LIMIT(pasid, pasid), GFP_ATOMIC);
34 }
35
36 static void pasid_private_remove(ioasid_t pasid)
37 {
38         xa_erase(&pasid_private_array, pasid);
39 }
40
41 static void *pasid_private_find(ioasid_t pasid)
42 {
43         return xa_load(&pasid_private_array, pasid);
44 }
45
46 static struct intel_svm_dev *
47 svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
48 {
49         struct intel_svm_dev *sdev = NULL, *t;
50
51         rcu_read_lock();
52         list_for_each_entry_rcu(t, &svm->devs, list) {
53                 if (t->dev == dev) {
54                         sdev = t;
55                         break;
56                 }
57         }
58         rcu_read_unlock();
59
60         return sdev;
61 }
62
63 int intel_svm_enable_prq(struct intel_iommu *iommu)
64 {
65         struct iopf_queue *iopfq;
66         struct page *pages;
67         int irq, ret;
68
69         pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
70         if (!pages) {
71                 pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
72                         iommu->name);
73                 return -ENOMEM;
74         }
75         iommu->prq = page_address(pages);
76
77         irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
78         if (irq <= 0) {
79                 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
80                        iommu->name);
81                 ret = -EINVAL;
82                 goto free_prq;
83         }
84         iommu->pr_irq = irq;
85
86         snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
87                  "dmar%d-iopfq", iommu->seq_id);
88         iopfq = iopf_queue_alloc(iommu->iopfq_name);
89         if (!iopfq) {
90                 pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
91                 ret = -ENOMEM;
92                 goto free_hwirq;
93         }
94         iommu->iopf_queue = iopfq;
95
96         snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
97
98         ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
99                                    iommu->prq_name, iommu);
100         if (ret) {
101                 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
102                        iommu->name);
103                 goto free_iopfq;
104         }
105         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
106         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
107         dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
108
109         init_completion(&iommu->prq_complete);
110
111         return 0;
112
113 free_iopfq:
114         iopf_queue_free(iommu->iopf_queue);
115         iommu->iopf_queue = NULL;
116 free_hwirq:
117         dmar_free_hwirq(irq);
118         iommu->pr_irq = 0;
119 free_prq:
120         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
121         iommu->prq = NULL;
122
123         return ret;
124 }
125
126 int intel_svm_finish_prq(struct intel_iommu *iommu)
127 {
128         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
129         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
130         dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
131
132         if (iommu->pr_irq) {
133                 free_irq(iommu->pr_irq, iommu);
134                 dmar_free_hwirq(iommu->pr_irq);
135                 iommu->pr_irq = 0;
136         }
137
138         if (iommu->iopf_queue) {
139                 iopf_queue_free(iommu->iopf_queue);
140                 iommu->iopf_queue = NULL;
141         }
142
143         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
144         iommu->prq = NULL;
145
146         return 0;
147 }
148
149 void intel_svm_check(struct intel_iommu *iommu)
150 {
151         if (!pasid_supported(iommu))
152                 return;
153
154         if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
155             !cap_fl1gp_support(iommu->cap)) {
156                 pr_err("%s SVM disabled, incompatible 1GB page capability\n",
157                        iommu->name);
158                 return;
159         }
160
161         if (cpu_feature_enabled(X86_FEATURE_LA57) &&
162             !cap_fl5lp_support(iommu->cap)) {
163                 pr_err("%s SVM disabled, incompatible paging mode\n",
164                        iommu->name);
165                 return;
166         }
167
168         iommu->flags |= VTD_FLAG_SVM_CAPABLE;
169 }
170
171 static void __flush_svm_range_dev(struct intel_svm *svm,
172                                   struct intel_svm_dev *sdev,
173                                   unsigned long address,
174                                   unsigned long pages, int ih)
175 {
176         struct device_domain_info *info = dev_iommu_priv_get(sdev->dev);
177
178         if (WARN_ON(!pages))
179                 return;
180
181         qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
182         if (info->ats_enabled) {
183                 qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
184                                          svm->pasid, sdev->qdep, address,
185                                          order_base_2(pages));
186                 quirk_extra_dev_tlb_flush(info, address, order_base_2(pages),
187                                           svm->pasid, sdev->qdep);
188         }
189 }
190
191 static void intel_flush_svm_range_dev(struct intel_svm *svm,
192                                       struct intel_svm_dev *sdev,
193                                       unsigned long address,
194                                       unsigned long pages, int ih)
195 {
196         unsigned long shift = ilog2(__roundup_pow_of_two(pages));
197         unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
198         unsigned long start = ALIGN_DOWN(address, align);
199         unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
200
201         while (start < end) {
202                 __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
203                 start += align;
204         }
205 }
206
207 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
208                                 unsigned long pages, int ih)
209 {
210         struct intel_svm_dev *sdev;
211
212         rcu_read_lock();
213         list_for_each_entry_rcu(sdev, &svm->devs, list)
214                 intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
215         rcu_read_unlock();
216 }
217
218 static void intel_flush_svm_all(struct intel_svm *svm)
219 {
220         struct device_domain_info *info;
221         struct intel_svm_dev *sdev;
222
223         rcu_read_lock();
224         list_for_each_entry_rcu(sdev, &svm->devs, list) {
225                 info = dev_iommu_priv_get(sdev->dev);
226
227                 qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0);
228                 if (info->ats_enabled) {
229                         qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
230                                                  svm->pasid, sdev->qdep,
231                                                  0, 64 - VTD_PAGE_SHIFT);
232                         quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT,
233                                                   svm->pasid, sdev->qdep);
234                 }
235         }
236         rcu_read_unlock();
237 }
238
239 /* Pages have been freed at this point */
240 static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
241                                         struct mm_struct *mm,
242                                         unsigned long start, unsigned long end)
243 {
244         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
245
246         if (start == 0 && end == -1UL) {
247                 intel_flush_svm_all(svm);
248                 return;
249         }
250
251         intel_flush_svm_range(svm, start,
252                               (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
253 }
254
255 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
256 {
257         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
258         struct intel_svm_dev *sdev;
259
260         /* This might end up being called from exit_mmap(), *before* the page
261          * tables are cleared. And __mmu_notifier_release() will delete us from
262          * the list of notifiers so that our invalidate_range() callback doesn't
263          * get called when the page tables are cleared. So we need to protect
264          * against hardware accessing those page tables.
265          *
266          * We do it by clearing the entry in the PASID table and then flushing
267          * the IOTLB and the PASID table caches. This might upset hardware;
268          * perhaps we'll want to point the PASID to a dummy PGD (like the zero
269          * page) so that we end up taking a fault that the hardware really
270          * *has* to handle gracefully without affecting other processes.
271          */
272         rcu_read_lock();
273         list_for_each_entry_rcu(sdev, &svm->devs, list)
274                 intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
275                                             svm->pasid, true);
276         rcu_read_unlock();
277
278 }
279
280 static const struct mmu_notifier_ops intel_mmuops = {
281         .release = intel_mm_release,
282         .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
283 };
284
285 static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
286                              struct intel_svm **rsvm,
287                              struct intel_svm_dev **rsdev)
288 {
289         struct intel_svm_dev *sdev = NULL;
290         struct intel_svm *svm;
291
292         if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX)
293                 return -EINVAL;
294
295         svm = pasid_private_find(pasid);
296         if (IS_ERR(svm))
297                 return PTR_ERR(svm);
298
299         if (!svm)
300                 goto out;
301
302         /*
303          * If we found svm for the PASID, there must be at least one device
304          * bond.
305          */
306         if (WARN_ON(list_empty(&svm->devs)))
307                 return -EINVAL;
308         sdev = svm_lookup_device_by_dev(svm, dev);
309
310 out:
311         *rsvm = svm;
312         *rsdev = sdev;
313
314         return 0;
315 }
316
317 static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
318                                    struct device *dev, ioasid_t pasid)
319 {
320         struct device_domain_info *info = dev_iommu_priv_get(dev);
321         struct intel_iommu *iommu = info->iommu;
322         struct mm_struct *mm = domain->mm;
323         struct intel_svm_dev *sdev;
324         struct intel_svm *svm;
325         unsigned long sflags;
326         int ret = 0;
327
328         svm = pasid_private_find(pasid);
329         if (!svm) {
330                 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
331                 if (!svm)
332                         return -ENOMEM;
333
334                 svm->pasid = pasid;
335                 svm->mm = mm;
336                 INIT_LIST_HEAD_RCU(&svm->devs);
337
338                 svm->notifier.ops = &intel_mmuops;
339                 ret = mmu_notifier_register(&svm->notifier, mm);
340                 if (ret) {
341                         kfree(svm);
342                         return ret;
343                 }
344
345                 ret = pasid_private_add(svm->pasid, svm);
346                 if (ret) {
347                         mmu_notifier_unregister(&svm->notifier, mm);
348                         kfree(svm);
349                         return ret;
350                 }
351         }
352
353         sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
354         if (!sdev) {
355                 ret = -ENOMEM;
356                 goto free_svm;
357         }
358
359         sdev->dev = dev;
360         sdev->iommu = iommu;
361         sdev->did = FLPT_DEFAULT_DID;
362         sdev->sid = PCI_DEVID(info->bus, info->devfn);
363         if (info->ats_enabled) {
364                 sdev->qdep = info->ats_qdep;
365                 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
366                         sdev->qdep = 0;
367         }
368
369         /* Setup the pasid table: */
370         sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
371         ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid,
372                                             FLPT_DEFAULT_DID, sflags);
373         if (ret)
374                 goto free_sdev;
375
376         list_add_rcu(&sdev->list, &svm->devs);
377
378         return 0;
379
380 free_sdev:
381         kfree(sdev);
382 free_svm:
383         if (list_empty(&svm->devs)) {
384                 mmu_notifier_unregister(&svm->notifier, mm);
385                 pasid_private_remove(pasid);
386                 kfree(svm);
387         }
388
389         return ret;
390 }
391
392 void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid)
393 {
394         struct intel_svm_dev *sdev;
395         struct intel_svm *svm;
396         struct mm_struct *mm;
397
398         if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev))
399                 return;
400         mm = svm->mm;
401
402         if (sdev) {
403                 list_del_rcu(&sdev->list);
404                 kfree_rcu(sdev, rcu);
405
406                 if (list_empty(&svm->devs)) {
407                         if (svm->notifier.ops)
408                                 mmu_notifier_unregister(&svm->notifier, mm);
409                         pasid_private_remove(svm->pasid);
410                         kfree(svm);
411                 }
412         }
413 }
414
415 /* Page request queue descriptor */
416 struct page_req_dsc {
417         union {
418                 struct {
419                         u64 type:8;
420                         u64 pasid_present:1;
421                         u64 priv_data_present:1;
422                         u64 rsvd:6;
423                         u64 rid:16;
424                         u64 pasid:20;
425                         u64 exe_req:1;
426                         u64 pm_req:1;
427                         u64 rsvd2:10;
428                 };
429                 u64 qw_0;
430         };
431         union {
432                 struct {
433                         u64 rd_req:1;
434                         u64 wr_req:1;
435                         u64 lpig:1;
436                         u64 prg_index:9;
437                         u64 addr:52;
438                 };
439                 u64 qw_1;
440         };
441         u64 priv_data[2];
442 };
443
444 static bool is_canonical_address(u64 addr)
445 {
446         int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
447         long saddr = (long) addr;
448
449         return (((saddr << shift) >> shift) == saddr);
450 }
451
452 /**
453  * intel_drain_pasid_prq - Drain page requests and responses for a pasid
454  * @dev: target device
455  * @pasid: pasid for draining
456  *
457  * Drain all pending page requests and responses related to @pasid in both
458  * software and hardware. This is supposed to be called after the device
459  * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
460  * and DevTLB have been invalidated.
461  *
462  * It waits until all pending page requests for @pasid in the page fault
463  * queue are completed by the prq handling thread. Then follow the steps
464  * described in VT-d spec CH7.10 to drain all page requests and page
465  * responses pending in the hardware.
466  */
467 void intel_drain_pasid_prq(struct device *dev, u32 pasid)
468 {
469         struct device_domain_info *info;
470         struct dmar_domain *domain;
471         struct intel_iommu *iommu;
472         struct qi_desc desc[3];
473         struct pci_dev *pdev;
474         int head, tail;
475         u16 sid, did;
476         int qdep;
477
478         info = dev_iommu_priv_get(dev);
479         if (WARN_ON(!info || !dev_is_pci(dev)))
480                 return;
481
482         if (!info->pri_enabled)
483                 return;
484
485         iommu = info->iommu;
486         domain = info->domain;
487         pdev = to_pci_dev(dev);
488         sid = PCI_DEVID(info->bus, info->devfn);
489         did = domain_id_iommu(domain, iommu);
490         qdep = pci_ats_queue_depth(pdev);
491
492         /*
493          * Check and wait until all pending page requests in the queue are
494          * handled by the prq handling thread.
495          */
496 prq_retry:
497         reinit_completion(&iommu->prq_complete);
498         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
499         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
500         while (head != tail) {
501                 struct page_req_dsc *req;
502
503                 req = &iommu->prq[head / sizeof(*req)];
504                 if (!req->pasid_present || req->pasid != pasid) {
505                         head = (head + sizeof(*req)) & PRQ_RING_MASK;
506                         continue;
507                 }
508
509                 wait_for_completion(&iommu->prq_complete);
510                 goto prq_retry;
511         }
512
513         iopf_queue_flush_dev(dev);
514
515         /*
516          * Perform steps described in VT-d spec CH7.10 to drain page
517          * requests and responses in hardware.
518          */
519         memset(desc, 0, sizeof(desc));
520         desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
521                         QI_IWD_FENCE |
522                         QI_IWD_TYPE;
523         desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
524                         QI_EIOTLB_DID(did) |
525                         QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
526                         QI_EIOTLB_TYPE;
527         desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
528                         QI_DEV_EIOTLB_SID(sid) |
529                         QI_DEV_EIOTLB_QDEP(qdep) |
530                         QI_DEIOTLB_TYPE |
531                         QI_DEV_IOTLB_PFSID(info->pfsid);
532 qi_retry:
533         reinit_completion(&iommu->prq_complete);
534         qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
535         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
536                 wait_for_completion(&iommu->prq_complete);
537                 goto qi_retry;
538         }
539 }
540
541 static int prq_to_iommu_prot(struct page_req_dsc *req)
542 {
543         int prot = 0;
544
545         if (req->rd_req)
546                 prot |= IOMMU_FAULT_PERM_READ;
547         if (req->wr_req)
548                 prot |= IOMMU_FAULT_PERM_WRITE;
549         if (req->exe_req)
550                 prot |= IOMMU_FAULT_PERM_EXEC;
551         if (req->pm_req)
552                 prot |= IOMMU_FAULT_PERM_PRIV;
553
554         return prot;
555 }
556
557 static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
558                                  struct page_req_dsc *desc)
559 {
560         struct iopf_fault event = { };
561
562         /* Fill in event data for device specific processing */
563         event.fault.type = IOMMU_FAULT_PAGE_REQ;
564         event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
565         event.fault.prm.pasid = desc->pasid;
566         event.fault.prm.grpid = desc->prg_index;
567         event.fault.prm.perm = prq_to_iommu_prot(desc);
568
569         if (desc->lpig)
570                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
571         if (desc->pasid_present) {
572                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
573                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
574         }
575         if (desc->priv_data_present) {
576                 /*
577                  * Set last page in group bit if private data is present,
578                  * page response is required as it does for LPIG.
579                  * iommu_report_device_fault() doesn't understand this vendor
580                  * specific requirement thus we set last_page as a workaround.
581                  */
582                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
583                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
584                 event.fault.prm.private_data[0] = desc->priv_data[0];
585                 event.fault.prm.private_data[1] = desc->priv_data[1];
586         } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
587                 /*
588                  * If the private data fields are not used by hardware, use it
589                  * to monitor the prq handle latency.
590                  */
591                 event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
592         }
593
594         iommu_report_device_fault(dev, &event);
595 }
596
597 static void handle_bad_prq_event(struct intel_iommu *iommu,
598                                  struct page_req_dsc *req, int result)
599 {
600         struct qi_desc desc;
601
602         pr_err("%s: Invalid page request: %08llx %08llx\n",
603                iommu->name, ((unsigned long long *)req)[0],
604                ((unsigned long long *)req)[1]);
605
606         /*
607          * Per VT-d spec. v3.0 ch7.7, system software must
608          * respond with page group response if private data
609          * is present (PDP) or last page in group (LPIG) bit
610          * is set. This is an additional VT-d feature beyond
611          * PCI ATS spec.
612          */
613         if (!req->lpig && !req->priv_data_present)
614                 return;
615
616         desc.qw0 = QI_PGRP_PASID(req->pasid) |
617                         QI_PGRP_DID(req->rid) |
618                         QI_PGRP_PASID_P(req->pasid_present) |
619                         QI_PGRP_PDP(req->priv_data_present) |
620                         QI_PGRP_RESP_CODE(result) |
621                         QI_PGRP_RESP_TYPE;
622         desc.qw1 = QI_PGRP_IDX(req->prg_index) |
623                         QI_PGRP_LPIG(req->lpig);
624
625         if (req->priv_data_present) {
626                 desc.qw2 = req->priv_data[0];
627                 desc.qw3 = req->priv_data[1];
628         } else {
629                 desc.qw2 = 0;
630                 desc.qw3 = 0;
631         }
632
633         qi_submit_sync(iommu, &desc, 1, 0);
634 }
635
636 static irqreturn_t prq_event_thread(int irq, void *d)
637 {
638         struct intel_iommu *iommu = d;
639         struct page_req_dsc *req;
640         int head, tail, handled;
641         struct device *dev;
642         u64 address;
643
644         /*
645          * Clear PPR bit before reading head/tail registers, to ensure that
646          * we get a new interrupt if needed.
647          */
648         writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
649
650         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
651         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
652         handled = (head != tail);
653         while (head != tail) {
654                 req = &iommu->prq[head / sizeof(*req)];
655                 address = (u64)req->addr << VTD_PAGE_SHIFT;
656
657                 if (unlikely(!req->pasid_present)) {
658                         pr_err("IOMMU: %s: Page request without PASID\n",
659                                iommu->name);
660 bad_req:
661                         handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
662                         goto prq_advance;
663                 }
664
665                 if (unlikely(!is_canonical_address(address))) {
666                         pr_err("IOMMU: %s: Address is not canonical\n",
667                                iommu->name);
668                         goto bad_req;
669                 }
670
671                 if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
672                         pr_err("IOMMU: %s: Page request in Privilege Mode\n",
673                                iommu->name);
674                         goto bad_req;
675                 }
676
677                 if (unlikely(req->exe_req && req->rd_req)) {
678                         pr_err("IOMMU: %s: Execution request not supported\n",
679                                iommu->name);
680                         goto bad_req;
681                 }
682
683                 /* Drop Stop Marker message. No need for a response. */
684                 if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
685                         goto prq_advance;
686
687                 /*
688                  * If prq is to be handled outside iommu driver via receiver of
689                  * the fault notifiers, we skip the page response here.
690                  */
691                 mutex_lock(&iommu->iopf_lock);
692                 dev = device_rbtree_find(iommu, req->rid);
693                 if (!dev) {
694                         mutex_unlock(&iommu->iopf_lock);
695                         goto bad_req;
696                 }
697
698                 intel_svm_prq_report(iommu, dev, req);
699                 trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
700                                  req->priv_data[0], req->priv_data[1],
701                                  iommu->prq_seq_number++);
702                 mutex_unlock(&iommu->iopf_lock);
703 prq_advance:
704                 head = (head + sizeof(*req)) & PRQ_RING_MASK;
705         }
706
707         dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
708
709         /*
710          * Clear the page request overflow bit and wake up all threads that
711          * are waiting for the completion of this handling.
712          */
713         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
714                 pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
715                                     iommu->name);
716                 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
717                 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
718                 if (head == tail) {
719                         iopf_queue_discard_partial(iommu->iopf_queue);
720                         writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
721                         pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
722                                             iommu->name);
723                 }
724         }
725
726         if (!completion_done(&iommu->prq_complete))
727                 complete(&iommu->prq_complete);
728
729         return IRQ_RETVAL(handled);
730 }
731
732 void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
733                              struct iommu_page_response *msg)
734 {
735         struct device_domain_info *info = dev_iommu_priv_get(dev);
736         struct intel_iommu *iommu = info->iommu;
737         u8 bus = info->bus, devfn = info->devfn;
738         struct iommu_fault_page_request *prm;
739         bool private_present;
740         bool pasid_present;
741         bool last_page;
742         u16 sid;
743
744         prm = &evt->fault.prm;
745         sid = PCI_DEVID(bus, devfn);
746         pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
747         private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
748         last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
749
750         /*
751          * Per VT-d spec. v3.0 ch7.7, system software must respond
752          * with page group response if private data is present (PDP)
753          * or last page in group (LPIG) bit is set. This is an
754          * additional VT-d requirement beyond PCI ATS spec.
755          */
756         if (last_page || private_present) {
757                 struct qi_desc desc;
758
759                 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
760                                 QI_PGRP_PASID_P(pasid_present) |
761                                 QI_PGRP_PDP(private_present) |
762                                 QI_PGRP_RESP_CODE(msg->code) |
763                                 QI_PGRP_RESP_TYPE;
764                 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
765                 desc.qw2 = 0;
766                 desc.qw3 = 0;
767
768                 if (private_present) {
769                         desc.qw2 = prm->private_data[0];
770                         desc.qw3 = prm->private_data[1];
771                 } else if (prm->private_data[0]) {
772                         dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
773                                 ktime_to_ns(ktime_get()) - prm->private_data[0]);
774                 }
775
776                 qi_submit_sync(iommu, &desc, 1, 0);
777         }
778 }
779
780 static void intel_svm_domain_free(struct iommu_domain *domain)
781 {
782         kfree(to_dmar_domain(domain));
783 }
784
785 static const struct iommu_domain_ops intel_svm_domain_ops = {
786         .set_dev_pasid          = intel_svm_set_dev_pasid,
787         .free                   = intel_svm_domain_free
788 };
789
790 struct iommu_domain *intel_svm_domain_alloc(void)
791 {
792         struct dmar_domain *domain;
793
794         domain = kzalloc(sizeof(*domain), GFP_KERNEL);
795         if (!domain)
796                 return NULL;
797         domain->domain.ops = &intel_svm_domain_ops;
798
799         return &domain->domain;
800 }