GNU Linux-libre 5.10.217-gnu1
[releases.git] / arch / powerpc / kvm / book3s_xive_native.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2017-2019, IBM Corporation.
4  */
5
6 #define pr_fmt(fmt) "xive-kvm: " fmt
7
8 #include <linux/kernel.h>
9 #include <linux/kvm_host.h>
10 #include <linux/err.h>
11 #include <linux/gfp.h>
12 #include <linux/spinlock.h>
13 #include <linux/delay.h>
14 #include <linux/file.h>
15 #include <asm/uaccess.h>
16 #include <asm/kvm_book3s.h>
17 #include <asm/kvm_ppc.h>
18 #include <asm/hvcall.h>
19 #include <asm/xive.h>
20 #include <asm/xive-regs.h>
21 #include <asm/debug.h>
22 #include <asm/debugfs.h>
23 #include <asm/opal.h>
24
25 #include <linux/debugfs.h>
26 #include <linux/seq_file.h>
27
28 #include "book3s_xive.h"
29
30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
31 {
32         u64 val;
33
34         /*
35          * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10
36          * load operation, so there is no need to enforce load-after-store
37          * ordering.
38          */
39
40         if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
41                 offset |= offset << 4;
42
43         val = in_be64(xd->eoi_mmio + offset);
44         return (u8)val;
45 }
46
47 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
48 {
49         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
50         struct xive_q *q = &xc->queues[prio];
51
52         xive_native_disable_queue(xc->vp_id, q, prio);
53         if (q->qpage) {
54                 put_page(virt_to_page(q->qpage));
55                 q->qpage = NULL;
56         }
57 }
58
59 static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
60                                               u8 prio, __be32 *qpage,
61                                               u32 order, bool can_escalate)
62 {
63         int rc;
64         __be32 *qpage_prev = q->qpage;
65
66         rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
67                                          can_escalate);
68         if (rc)
69                 return rc;
70
71         if (qpage_prev)
72                 put_page(virt_to_page(qpage_prev));
73
74         return rc;
75 }
76
77 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
78 {
79         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
80         int i;
81
82         if (!kvmppc_xive_enabled(vcpu))
83                 return;
84
85         if (!xc)
86                 return;
87
88         pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
89
90         /* Ensure no interrupt is still routed to that VP */
91         xc->valid = false;
92         kvmppc_xive_disable_vcpu_interrupts(vcpu);
93
94         /* Free escalations */
95         for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
96                 /* Free the escalation irq */
97                 if (xc->esc_virq[i]) {
98                         if (xc->xive->single_escalation)
99                                 xive_cleanup_single_escalation(vcpu, xc,
100                                                         xc->esc_virq[i]);
101                         free_irq(xc->esc_virq[i], vcpu);
102                         irq_dispose_mapping(xc->esc_virq[i]);
103                         kfree(xc->esc_virq_names[i]);
104                         xc->esc_virq[i] = 0;
105                 }
106         }
107
108         /* Disable the VP */
109         xive_native_disable_vp(xc->vp_id);
110
111         /* Clear the cam word so guest entry won't try to push context */
112         vcpu->arch.xive_cam_word = 0;
113
114         /* Free the queues */
115         for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
116                 kvmppc_xive_native_cleanup_queue(vcpu, i);
117         }
118
119         /* Free the VP */
120         kfree(xc);
121
122         /* Cleanup the vcpu */
123         vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
124         vcpu->arch.xive_vcpu = NULL;
125 }
126
127 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
128                                     struct kvm_vcpu *vcpu, u32 server_num)
129 {
130         struct kvmppc_xive *xive = dev->private;
131         struct kvmppc_xive_vcpu *xc = NULL;
132         int rc;
133         u32 vp_id;
134
135         pr_devel("native_connect_vcpu(server=%d)\n", server_num);
136
137         if (dev->ops != &kvm_xive_native_ops) {
138                 pr_devel("Wrong ops !\n");
139                 return -EPERM;
140         }
141         if (xive->kvm != vcpu->kvm)
142                 return -EPERM;
143         if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
144                 return -EBUSY;
145
146         mutex_lock(&xive->lock);
147
148         rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
149         if (rc)
150                 goto bail;
151
152         xc = kzalloc(sizeof(*xc), GFP_KERNEL);
153         if (!xc) {
154                 rc = -ENOMEM;
155                 goto bail;
156         }
157
158         vcpu->arch.xive_vcpu = xc;
159         xc->xive = xive;
160         xc->vcpu = vcpu;
161         xc->server_num = server_num;
162
163         xc->vp_id = vp_id;
164         xc->valid = true;
165         vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
166
167         rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
168         if (rc) {
169                 pr_err("Failed to get VP info from OPAL: %d\n", rc);
170                 goto bail;
171         }
172
173         /*
174          * Enable the VP first as the single escalation mode will
175          * affect escalation interrupts numbering
176          */
177         rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
178         if (rc) {
179                 pr_err("Failed to enable VP in OPAL: %d\n", rc);
180                 goto bail;
181         }
182
183         /* Configure VCPU fields for use by assembly push/pull */
184         vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
185         vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
186
187         /* TODO: reset all queues to a clean state ? */
188 bail:
189         mutex_unlock(&xive->lock);
190         if (rc)
191                 kvmppc_xive_native_cleanup_vcpu(vcpu);
192
193         return rc;
194 }
195
196 /*
197  * Device passthrough support
198  */
199 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
200 {
201         struct kvmppc_xive *xive = kvm->arch.xive;
202         pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2;
203
204         if (irq >= KVMPPC_XIVE_NR_IRQS)
205                 return -EINVAL;
206
207         /*
208          * Clear the ESB pages of the IRQ number being mapped (or
209          * unmapped) into the guest and let the the VM fault handler
210          * repopulate with the appropriate ESB pages (device or IC)
211          */
212         pr_debug("clearing esb pages for girq 0x%lx\n", irq);
213         mutex_lock(&xive->mapping_lock);
214         if (xive->mapping)
215                 unmap_mapping_range(xive->mapping,
216                                     esb_pgoff << PAGE_SHIFT,
217                                     2ull << PAGE_SHIFT, 1);
218         mutex_unlock(&xive->mapping_lock);
219         return 0;
220 }
221
222 static struct kvmppc_xive_ops kvmppc_xive_native_ops =  {
223         .reset_mapped = kvmppc_xive_native_reset_mapped,
224 };
225
226 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
227 {
228         struct vm_area_struct *vma = vmf->vma;
229         struct kvm_device *dev = vma->vm_file->private_data;
230         struct kvmppc_xive *xive = dev->private;
231         struct kvmppc_xive_src_block *sb;
232         struct kvmppc_xive_irq_state *state;
233         struct xive_irq_data *xd;
234         u32 hw_num;
235         u16 src;
236         u64 page;
237         unsigned long irq;
238         u64 page_offset;
239
240         /*
241          * Linux/KVM uses a two pages ESB setting, one for trigger and
242          * one for EOI
243          */
244         page_offset = vmf->pgoff - vma->vm_pgoff;
245         irq = page_offset / 2;
246
247         sb = kvmppc_xive_find_source(xive, irq, &src);
248         if (!sb) {
249                 pr_devel("%s: source %lx not found !\n", __func__, irq);
250                 return VM_FAULT_SIGBUS;
251         }
252
253         state = &sb->irq_state[src];
254
255         /* Some sanity checking */
256         if (!state->valid) {
257                 pr_devel("%s: source %lx invalid !\n", __func__, irq);
258                 return VM_FAULT_SIGBUS;
259         }
260
261         kvmppc_xive_select_irq(state, &hw_num, &xd);
262
263         arch_spin_lock(&sb->lock);
264
265         /*
266          * first/even page is for trigger
267          * second/odd page is for EOI and management.
268          */
269         page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
270         arch_spin_unlock(&sb->lock);
271
272         if (WARN_ON(!page)) {
273                 pr_err("%s: accessing invalid ESB page for source %lx !\n",
274                        __func__, irq);
275                 return VM_FAULT_SIGBUS;
276         }
277
278         vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
279         return VM_FAULT_NOPAGE;
280 }
281
282 static const struct vm_operations_struct xive_native_esb_vmops = {
283         .fault = xive_native_esb_fault,
284 };
285
286 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
287 {
288         struct vm_area_struct *vma = vmf->vma;
289
290         switch (vmf->pgoff - vma->vm_pgoff) {
291         case 0: /* HW - forbid access */
292         case 1: /* HV - forbid access */
293                 return VM_FAULT_SIGBUS;
294         case 2: /* OS */
295                 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
296                 return VM_FAULT_NOPAGE;
297         case 3: /* USER - TODO */
298         default:
299                 return VM_FAULT_SIGBUS;
300         }
301 }
302
303 static const struct vm_operations_struct xive_native_tima_vmops = {
304         .fault = xive_native_tima_fault,
305 };
306
307 static int kvmppc_xive_native_mmap(struct kvm_device *dev,
308                                    struct vm_area_struct *vma)
309 {
310         struct kvmppc_xive *xive = dev->private;
311
312         /* We only allow mappings at fixed offset for now */
313         if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
314                 if (vma_pages(vma) > 4)
315                         return -EINVAL;
316                 vma->vm_ops = &xive_native_tima_vmops;
317         } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
318                 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
319                         return -EINVAL;
320                 vma->vm_ops = &xive_native_esb_vmops;
321         } else {
322                 return -EINVAL;
323         }
324
325         vma->vm_flags |= VM_IO | VM_PFNMAP;
326         vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
327
328         /*
329          * Grab the KVM device file address_space to be able to clear
330          * the ESB pages mapping when a device is passed-through into
331          * the guest.
332          */
333         xive->mapping = vma->vm_file->f_mapping;
334         return 0;
335 }
336
337 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
338                                          u64 addr)
339 {
340         struct kvmppc_xive_src_block *sb;
341         struct kvmppc_xive_irq_state *state;
342         u64 __user *ubufp = (u64 __user *) addr;
343         u64 val;
344         u16 idx;
345         int rc;
346
347         pr_devel("%s irq=0x%lx\n", __func__, irq);
348
349         if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
350                 return -E2BIG;
351
352         sb = kvmppc_xive_find_source(xive, irq, &idx);
353         if (!sb) {
354                 pr_debug("No source, creating source block...\n");
355                 sb = kvmppc_xive_create_src_block(xive, irq);
356                 if (!sb) {
357                         pr_err("Failed to create block...\n");
358                         return -ENOMEM;
359                 }
360         }
361         state = &sb->irq_state[idx];
362
363         if (get_user(val, ubufp)) {
364                 pr_err("fault getting user info !\n");
365                 return -EFAULT;
366         }
367
368         arch_spin_lock(&sb->lock);
369
370         /*
371          * If the source doesn't already have an IPI, allocate
372          * one and get the corresponding data
373          */
374         if (!state->ipi_number) {
375                 state->ipi_number = xive_native_alloc_irq();
376                 if (state->ipi_number == 0) {
377                         pr_err("Failed to allocate IRQ !\n");
378                         rc = -ENXIO;
379                         goto unlock;
380                 }
381                 xive_native_populate_irq_data(state->ipi_number,
382                                               &state->ipi_data);
383                 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
384                          state->ipi_number, irq);
385         }
386
387         /* Restore LSI state */
388         if (val & KVM_XIVE_LEVEL_SENSITIVE) {
389                 state->lsi = true;
390                 if (val & KVM_XIVE_LEVEL_ASSERTED)
391                         state->asserted = true;
392                 pr_devel("  LSI ! Asserted=%d\n", state->asserted);
393         }
394
395         /* Mask IRQ to start with */
396         state->act_server = 0;
397         state->act_priority = MASKED;
398         xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
399         xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
400
401         /* Increment the number of valid sources and mark this one valid */
402         if (!state->valid)
403                 xive->src_count++;
404         state->valid = true;
405
406         rc = 0;
407
408 unlock:
409         arch_spin_unlock(&sb->lock);
410
411         return rc;
412 }
413
414 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
415                                         struct kvmppc_xive_src_block *sb,
416                                         struct kvmppc_xive_irq_state *state,
417                                         u32 server, u8 priority, bool masked,
418                                         u32 eisn)
419 {
420         struct kvm *kvm = xive->kvm;
421         u32 hw_num;
422         int rc = 0;
423
424         arch_spin_lock(&sb->lock);
425
426         if (state->act_server == server && state->act_priority == priority &&
427             state->eisn == eisn)
428                 goto unlock;
429
430         pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
431                  priority, server, masked, state->act_server,
432                  state->act_priority);
433
434         kvmppc_xive_select_irq(state, &hw_num, NULL);
435
436         if (priority != MASKED && !masked) {
437                 rc = kvmppc_xive_select_target(kvm, &server, priority);
438                 if (rc)
439                         goto unlock;
440
441                 state->act_priority = priority;
442                 state->act_server = server;
443                 state->eisn = eisn;
444
445                 rc = xive_native_configure_irq(hw_num,
446                                                kvmppc_xive_vp(xive, server),
447                                                priority, eisn);
448         } else {
449                 state->act_priority = MASKED;
450                 state->act_server = 0;
451                 state->eisn = 0;
452
453                 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
454         }
455
456 unlock:
457         arch_spin_unlock(&sb->lock);
458         return rc;
459 }
460
461 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
462                                                 long irq, u64 addr)
463 {
464         struct kvmppc_xive_src_block *sb;
465         struct kvmppc_xive_irq_state *state;
466         u64 __user *ubufp = (u64 __user *) addr;
467         u16 src;
468         u64 kvm_cfg;
469         u32 server;
470         u8 priority;
471         bool masked;
472         u32 eisn;
473
474         sb = kvmppc_xive_find_source(xive, irq, &src);
475         if (!sb)
476                 return -ENOENT;
477
478         state = &sb->irq_state[src];
479
480         if (!state->valid)
481                 return -EINVAL;
482
483         if (get_user(kvm_cfg, ubufp))
484                 return -EFAULT;
485
486         pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
487
488         priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
489                 KVM_XIVE_SOURCE_PRIORITY_SHIFT;
490         server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
491                 KVM_XIVE_SOURCE_SERVER_SHIFT;
492         masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
493                 KVM_XIVE_SOURCE_MASKED_SHIFT;
494         eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
495                 KVM_XIVE_SOURCE_EISN_SHIFT;
496
497         if (priority != xive_prio_from_guest(priority)) {
498                 pr_err("invalid priority for queue %d for VCPU %d\n",
499                        priority, server);
500                 return -EINVAL;
501         }
502
503         return kvmppc_xive_native_update_source_config(xive, sb, state, server,
504                                                        priority, masked, eisn);
505 }
506
507 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
508                                           long irq, u64 addr)
509 {
510         struct kvmppc_xive_src_block *sb;
511         struct kvmppc_xive_irq_state *state;
512         struct xive_irq_data *xd;
513         u32 hw_num;
514         u16 src;
515         int rc = 0;
516
517         pr_devel("%s irq=0x%lx", __func__, irq);
518
519         sb = kvmppc_xive_find_source(xive, irq, &src);
520         if (!sb)
521                 return -ENOENT;
522
523         state = &sb->irq_state[src];
524
525         rc = -EINVAL;
526
527         arch_spin_lock(&sb->lock);
528
529         if (state->valid) {
530                 kvmppc_xive_select_irq(state, &hw_num, &xd);
531                 xive_native_sync_source(hw_num);
532                 rc = 0;
533         }
534
535         arch_spin_unlock(&sb->lock);
536         return rc;
537 }
538
539 static int xive_native_validate_queue_size(u32 qshift)
540 {
541         /*
542          * We only support 64K pages for the moment. This is also
543          * advertised in the DT property "ibm,xive-eq-sizes"
544          */
545         switch (qshift) {
546         case 0: /* EQ reset */
547         case 16:
548                 return 0;
549         case 12:
550         case 21:
551         case 24:
552         default:
553                 return -EINVAL;
554         }
555 }
556
557 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
558                                                long eq_idx, u64 addr)
559 {
560         struct kvm *kvm = xive->kvm;
561         struct kvm_vcpu *vcpu;
562         struct kvmppc_xive_vcpu *xc;
563         void __user *ubufp = (void __user *) addr;
564         u32 server;
565         u8 priority;
566         struct kvm_ppc_xive_eq kvm_eq;
567         int rc;
568         __be32 *qaddr = 0;
569         struct page *page;
570         struct xive_q *q;
571         gfn_t gfn;
572         unsigned long page_size;
573         int srcu_idx;
574
575         /*
576          * Demangle priority/server tuple from the EQ identifier
577          */
578         priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
579                 KVM_XIVE_EQ_PRIORITY_SHIFT;
580         server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
581                 KVM_XIVE_EQ_SERVER_SHIFT;
582
583         if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
584                 return -EFAULT;
585
586         vcpu = kvmppc_xive_find_server(kvm, server);
587         if (!vcpu) {
588                 pr_err("Can't find server %d\n", server);
589                 return -ENOENT;
590         }
591         xc = vcpu->arch.xive_vcpu;
592
593         if (priority != xive_prio_from_guest(priority)) {
594                 pr_err("Trying to restore invalid queue %d for VCPU %d\n",
595                        priority, server);
596                 return -EINVAL;
597         }
598         q = &xc->queues[priority];
599
600         pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
601                  __func__, server, priority, kvm_eq.flags,
602                  kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
603
604         /* reset queue and disable queueing */
605         if (!kvm_eq.qshift) {
606                 q->guest_qaddr  = 0;
607                 q->guest_qshift = 0;
608
609                 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
610                                                         NULL, 0, true);
611                 if (rc) {
612                         pr_err("Failed to reset queue %d for VCPU %d: %d\n",
613                                priority, xc->server_num, rc);
614                         return rc;
615                 }
616
617                 return 0;
618         }
619
620         /*
621          * sPAPR specifies a "Unconditional Notify (n) flag" for the
622          * H_INT_SET_QUEUE_CONFIG hcall which forces notification
623          * without using the coalescing mechanisms provided by the
624          * XIVE END ESBs. This is required on KVM as notification
625          * using the END ESBs is not supported.
626          */
627         if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
628                 pr_err("invalid flags %d\n", kvm_eq.flags);
629                 return -EINVAL;
630         }
631
632         rc = xive_native_validate_queue_size(kvm_eq.qshift);
633         if (rc) {
634                 pr_err("invalid queue size %d\n", kvm_eq.qshift);
635                 return rc;
636         }
637
638         if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
639                 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
640                        1ull << kvm_eq.qshift);
641                 return -EINVAL;
642         }
643
644         srcu_idx = srcu_read_lock(&kvm->srcu);
645         gfn = gpa_to_gfn(kvm_eq.qaddr);
646
647         page_size = kvm_host_page_size(vcpu, gfn);
648         if (1ull << kvm_eq.qshift > page_size) {
649                 srcu_read_unlock(&kvm->srcu, srcu_idx);
650                 pr_warn("Incompatible host page size %lx!\n", page_size);
651                 return -EINVAL;
652         }
653
654         page = gfn_to_page(kvm, gfn);
655         if (is_error_page(page)) {
656                 srcu_read_unlock(&kvm->srcu, srcu_idx);
657                 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
658                 return -EINVAL;
659         }
660
661         qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
662         srcu_read_unlock(&kvm->srcu, srcu_idx);
663
664         /*
665          * Backup the queue page guest address to the mark EQ page
666          * dirty for migration.
667          */
668         q->guest_qaddr  = kvm_eq.qaddr;
669         q->guest_qshift = kvm_eq.qshift;
670
671          /*
672           * Unconditional Notification is forced by default at the
673           * OPAL level because the use of END ESBs is not supported by
674           * Linux.
675           */
676         rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
677                                         (__be32 *) qaddr, kvm_eq.qshift, true);
678         if (rc) {
679                 pr_err("Failed to configure queue %d for VCPU %d: %d\n",
680                        priority, xc->server_num, rc);
681                 put_page(page);
682                 return rc;
683         }
684
685         /*
686          * Only restore the queue state when needed. When doing the
687          * H_INT_SET_SOURCE_CONFIG hcall, it should not.
688          */
689         if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
690                 rc = xive_native_set_queue_state(xc->vp_id, priority,
691                                                  kvm_eq.qtoggle,
692                                                  kvm_eq.qindex);
693                 if (rc)
694                         goto error;
695         }
696
697         rc = kvmppc_xive_attach_escalation(vcpu, priority,
698                                            xive->single_escalation);
699 error:
700         if (rc)
701                 kvmppc_xive_native_cleanup_queue(vcpu, priority);
702         return rc;
703 }
704
705 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
706                                                long eq_idx, u64 addr)
707 {
708         struct kvm *kvm = xive->kvm;
709         struct kvm_vcpu *vcpu;
710         struct kvmppc_xive_vcpu *xc;
711         struct xive_q *q;
712         void __user *ubufp = (u64 __user *) addr;
713         u32 server;
714         u8 priority;
715         struct kvm_ppc_xive_eq kvm_eq;
716         u64 qaddr;
717         u64 qshift;
718         u64 qeoi_page;
719         u32 escalate_irq;
720         u64 qflags;
721         int rc;
722
723         /*
724          * Demangle priority/server tuple from the EQ identifier
725          */
726         priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
727                 KVM_XIVE_EQ_PRIORITY_SHIFT;
728         server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
729                 KVM_XIVE_EQ_SERVER_SHIFT;
730
731         vcpu = kvmppc_xive_find_server(kvm, server);
732         if (!vcpu) {
733                 pr_err("Can't find server %d\n", server);
734                 return -ENOENT;
735         }
736         xc = vcpu->arch.xive_vcpu;
737
738         if (priority != xive_prio_from_guest(priority)) {
739                 pr_err("invalid priority for queue %d for VCPU %d\n",
740                        priority, server);
741                 return -EINVAL;
742         }
743         q = &xc->queues[priority];
744
745         memset(&kvm_eq, 0, sizeof(kvm_eq));
746
747         if (!q->qpage)
748                 return 0;
749
750         rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
751                                         &qeoi_page, &escalate_irq, &qflags);
752         if (rc)
753                 return rc;
754
755         kvm_eq.flags = 0;
756         if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
757                 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
758
759         kvm_eq.qshift = q->guest_qshift;
760         kvm_eq.qaddr  = q->guest_qaddr;
761
762         rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
763                                          &kvm_eq.qindex);
764         if (rc)
765                 return rc;
766
767         pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
768                  __func__, server, priority, kvm_eq.flags,
769                  kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
770
771         if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
772                 return -EFAULT;
773
774         return 0;
775 }
776
777 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
778 {
779         int i;
780
781         for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
782                 struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
783
784                 if (!state->valid)
785                         continue;
786
787                 if (state->act_priority == MASKED)
788                         continue;
789
790                 state->eisn = 0;
791                 state->act_server = 0;
792                 state->act_priority = MASKED;
793                 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
794                 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
795                 if (state->pt_number) {
796                         xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
797                         xive_native_configure_irq(state->pt_number,
798                                                   0, MASKED, 0);
799                 }
800         }
801 }
802
803 static int kvmppc_xive_reset(struct kvmppc_xive *xive)
804 {
805         struct kvm *kvm = xive->kvm;
806         struct kvm_vcpu *vcpu;
807         unsigned int i;
808
809         pr_devel("%s\n", __func__);
810
811         mutex_lock(&xive->lock);
812
813         kvm_for_each_vcpu(i, vcpu, kvm) {
814                 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
815                 unsigned int prio;
816
817                 if (!xc)
818                         continue;
819
820                 kvmppc_xive_disable_vcpu_interrupts(vcpu);
821
822                 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
823
824                         /* Single escalation, no queue 7 */
825                         if (prio == 7 && xive->single_escalation)
826                                 break;
827
828                         if (xc->esc_virq[prio]) {
829                                 free_irq(xc->esc_virq[prio], vcpu);
830                                 irq_dispose_mapping(xc->esc_virq[prio]);
831                                 kfree(xc->esc_virq_names[prio]);
832                                 xc->esc_virq[prio] = 0;
833                         }
834
835                         kvmppc_xive_native_cleanup_queue(vcpu, prio);
836                 }
837         }
838
839         for (i = 0; i <= xive->max_sbid; i++) {
840                 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
841
842                 if (sb) {
843                         arch_spin_lock(&sb->lock);
844                         kvmppc_xive_reset_sources(sb);
845                         arch_spin_unlock(&sb->lock);
846                 }
847         }
848
849         mutex_unlock(&xive->lock);
850
851         return 0;
852 }
853
854 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
855 {
856         int j;
857
858         for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
859                 struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
860                 struct xive_irq_data *xd;
861                 u32 hw_num;
862
863                 if (!state->valid)
864                         continue;
865
866                 /*
867                  * The struct kvmppc_xive_irq_state reflects the state
868                  * of the EAS configuration and not the state of the
869                  * source. The source is masked setting the PQ bits to
870                  * '-Q', which is what is being done before calling
871                  * the KVM_DEV_XIVE_EQ_SYNC control.
872                  *
873                  * If a source EAS is configured, OPAL syncs the XIVE
874                  * IC of the source and the XIVE IC of the previous
875                  * target if any.
876                  *
877                  * So it should be fine ignoring MASKED sources as
878                  * they have been synced already.
879                  */
880                 if (state->act_priority == MASKED)
881                         continue;
882
883                 kvmppc_xive_select_irq(state, &hw_num, &xd);
884                 xive_native_sync_source(hw_num);
885                 xive_native_sync_queue(hw_num);
886         }
887 }
888
889 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
890 {
891         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
892         unsigned int prio;
893         int srcu_idx;
894
895         if (!xc)
896                 return -ENOENT;
897
898         for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
899                 struct xive_q *q = &xc->queues[prio];
900
901                 if (!q->qpage)
902                         continue;
903
904                 /* Mark EQ page dirty for migration */
905                 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
906                 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
907                 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
908         }
909         return 0;
910 }
911
912 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
913 {
914         struct kvm *kvm = xive->kvm;
915         struct kvm_vcpu *vcpu;
916         unsigned int i;
917
918         pr_devel("%s\n", __func__);
919
920         mutex_lock(&xive->lock);
921         for (i = 0; i <= xive->max_sbid; i++) {
922                 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
923
924                 if (sb) {
925                         arch_spin_lock(&sb->lock);
926                         kvmppc_xive_native_sync_sources(sb);
927                         arch_spin_unlock(&sb->lock);
928                 }
929         }
930
931         kvm_for_each_vcpu(i, vcpu, kvm) {
932                 kvmppc_xive_native_vcpu_eq_sync(vcpu);
933         }
934         mutex_unlock(&xive->lock);
935
936         return 0;
937 }
938
939 static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
940                                        struct kvm_device_attr *attr)
941 {
942         struct kvmppc_xive *xive = dev->private;
943
944         switch (attr->group) {
945         case KVM_DEV_XIVE_GRP_CTRL:
946                 switch (attr->attr) {
947                 case KVM_DEV_XIVE_RESET:
948                         return kvmppc_xive_reset(xive);
949                 case KVM_DEV_XIVE_EQ_SYNC:
950                         return kvmppc_xive_native_eq_sync(xive);
951                 case KVM_DEV_XIVE_NR_SERVERS:
952                         return kvmppc_xive_set_nr_servers(xive, attr->addr);
953                 }
954                 break;
955         case KVM_DEV_XIVE_GRP_SOURCE:
956                 return kvmppc_xive_native_set_source(xive, attr->attr,
957                                                      attr->addr);
958         case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
959                 return kvmppc_xive_native_set_source_config(xive, attr->attr,
960                                                             attr->addr);
961         case KVM_DEV_XIVE_GRP_EQ_CONFIG:
962                 return kvmppc_xive_native_set_queue_config(xive, attr->attr,
963                                                            attr->addr);
964         case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
965                 return kvmppc_xive_native_sync_source(xive, attr->attr,
966                                                       attr->addr);
967         }
968         return -ENXIO;
969 }
970
971 static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
972                                        struct kvm_device_attr *attr)
973 {
974         struct kvmppc_xive *xive = dev->private;
975
976         switch (attr->group) {
977         case KVM_DEV_XIVE_GRP_EQ_CONFIG:
978                 return kvmppc_xive_native_get_queue_config(xive, attr->attr,
979                                                            attr->addr);
980         }
981         return -ENXIO;
982 }
983
984 static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
985                                        struct kvm_device_attr *attr)
986 {
987         switch (attr->group) {
988         case KVM_DEV_XIVE_GRP_CTRL:
989                 switch (attr->attr) {
990                 case KVM_DEV_XIVE_RESET:
991                 case KVM_DEV_XIVE_EQ_SYNC:
992                 case KVM_DEV_XIVE_NR_SERVERS:
993                         return 0;
994                 }
995                 break;
996         case KVM_DEV_XIVE_GRP_SOURCE:
997         case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
998         case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
999                 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
1000                     attr->attr < KVMPPC_XIVE_NR_IRQS)
1001                         return 0;
1002                 break;
1003         case KVM_DEV_XIVE_GRP_EQ_CONFIG:
1004                 return 0;
1005         }
1006         return -ENXIO;
1007 }
1008
1009 /*
1010  * Called when device fd is closed.  kvm->lock is held.
1011  */
1012 static void kvmppc_xive_native_release(struct kvm_device *dev)
1013 {
1014         struct kvmppc_xive *xive = dev->private;
1015         struct kvm *kvm = xive->kvm;
1016         struct kvm_vcpu *vcpu;
1017         int i;
1018
1019         pr_devel("Releasing xive native device\n");
1020
1021         /*
1022          * Clear the KVM device file address_space which is used to
1023          * unmap the ESB pages when a device is passed-through.
1024          */
1025         mutex_lock(&xive->mapping_lock);
1026         xive->mapping = NULL;
1027         mutex_unlock(&xive->mapping_lock);
1028
1029         /*
1030          * Since this is the device release function, we know that
1031          * userspace does not have any open fd or mmap referring to
1032          * the device.  Therefore there can not be any of the
1033          * device attribute set/get, mmap, or page fault functions
1034          * being executed concurrently, and similarly, the
1035          * connect_vcpu and set/clr_mapped functions also cannot
1036          * be being executed.
1037          */
1038
1039         debugfs_remove(xive->dentry);
1040
1041         /*
1042          * We should clean up the vCPU interrupt presenters first.
1043          */
1044         kvm_for_each_vcpu(i, vcpu, kvm) {
1045                 /*
1046                  * Take vcpu->mutex to ensure that no one_reg get/set ioctl
1047                  * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
1048                  * Holding the vcpu->mutex also means that the vcpu cannot
1049                  * be executing the KVM_RUN ioctl, and therefore it cannot
1050                  * be executing the XIVE push or pull code or accessing
1051                  * the XIVE MMIO regions.
1052                  */
1053                 mutex_lock(&vcpu->mutex);
1054                 kvmppc_xive_native_cleanup_vcpu(vcpu);
1055                 mutex_unlock(&vcpu->mutex);
1056         }
1057
1058         /*
1059          * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
1060          * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
1061          * against xive code getting called during vcpu execution or
1062          * set/get one_reg operations.
1063          */
1064         kvm->arch.xive = NULL;
1065
1066         for (i = 0; i <= xive->max_sbid; i++) {
1067                 if (xive->src_blocks[i])
1068                         kvmppc_xive_free_sources(xive->src_blocks[i]);
1069                 kfree(xive->src_blocks[i]);
1070                 xive->src_blocks[i] = NULL;
1071         }
1072
1073         if (xive->vp_base != XIVE_INVALID_VP)
1074                 xive_native_free_vp_block(xive->vp_base);
1075
1076         /*
1077          * A reference of the kvmppc_xive pointer is now kept under
1078          * the xive_devices struct of the machine for reuse. It is
1079          * freed when the VM is destroyed for now until we fix all the
1080          * execution paths.
1081          */
1082
1083         kfree(dev);
1084 }
1085
1086 /*
1087  * Create a XIVE device.  kvm->lock is held.
1088  */
1089 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
1090 {
1091         struct kvmppc_xive *xive;
1092         struct kvm *kvm = dev->kvm;
1093
1094         pr_devel("Creating xive native device\n");
1095
1096         if (kvm->arch.xive)
1097                 return -EEXIST;
1098
1099         xive = kvmppc_xive_get_device(kvm, type);
1100         if (!xive)
1101                 return -ENOMEM;
1102
1103         dev->private = xive;
1104         xive->dev = dev;
1105         xive->kvm = kvm;
1106         mutex_init(&xive->mapping_lock);
1107         mutex_init(&xive->lock);
1108
1109         /* VP allocation is delayed to the first call to connect_vcpu */
1110         xive->vp_base = XIVE_INVALID_VP;
1111         /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
1112          * on a POWER9 system.
1113          */
1114         xive->nr_servers = KVM_MAX_VCPUS;
1115
1116         xive->single_escalation = xive_native_has_single_escalation();
1117         xive->ops = &kvmppc_xive_native_ops;
1118
1119         kvm->arch.xive = xive;
1120         return 0;
1121 }
1122
1123 /*
1124  * Interrupt Pending Buffer (IPB) offset
1125  */
1126 #define TM_IPB_SHIFT 40
1127 #define TM_IPB_MASK  (((u64) 0xFF) << TM_IPB_SHIFT)
1128
1129 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1130 {
1131         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1132         u64 opal_state;
1133         int rc;
1134
1135         if (!kvmppc_xive_enabled(vcpu))
1136                 return -EPERM;
1137
1138         if (!xc)
1139                 return -ENOENT;
1140
1141         /* Thread context registers. We only care about IPB and CPPR */
1142         val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
1143
1144         /* Get the VP state from OPAL */
1145         rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
1146         if (rc)
1147                 return rc;
1148
1149         /*
1150          * Capture the backup of IPB register in the NVT structure and
1151          * merge it in our KVM VP state.
1152          */
1153         val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
1154
1155         pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
1156                  __func__,
1157                  vcpu->arch.xive_saved_state.nsr,
1158                  vcpu->arch.xive_saved_state.cppr,
1159                  vcpu->arch.xive_saved_state.ipb,
1160                  vcpu->arch.xive_saved_state.pipr,
1161                  vcpu->arch.xive_saved_state.w01,
1162                  (u32) vcpu->arch.xive_cam_word, opal_state);
1163
1164         return 0;
1165 }
1166
1167 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1168 {
1169         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1170         struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
1171
1172         pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
1173                  val->xive_timaval[0], val->xive_timaval[1]);
1174
1175         if (!kvmppc_xive_enabled(vcpu))
1176                 return -EPERM;
1177
1178         if (!xc || !xive)
1179                 return -ENOENT;
1180
1181         /* We can't update the state of a "pushed" VCPU  */
1182         if (WARN_ON(vcpu->arch.xive_pushed))
1183                 return -EBUSY;
1184
1185         /*
1186          * Restore the thread context registers. IPB and CPPR should
1187          * be the only ones that matter.
1188          */
1189         vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
1190
1191         /*
1192          * There is no need to restore the XIVE internal state (IPB
1193          * stored in the NVT) as the IPB register was merged in KVM VP
1194          * state when captured.
1195          */
1196         return 0;
1197 }
1198
1199 bool kvmppc_xive_native_supported(void)
1200 {
1201         return xive_native_has_queue_state_support();
1202 }
1203
1204 static int xive_native_debug_show(struct seq_file *m, void *private)
1205 {
1206         struct kvmppc_xive *xive = m->private;
1207         struct kvm *kvm = xive->kvm;
1208         struct kvm_vcpu *vcpu;
1209         unsigned int i;
1210
1211         if (!kvm)
1212                 return 0;
1213
1214         seq_puts(m, "=========\nVCPU state\n=========\n");
1215
1216         kvm_for_each_vcpu(i, vcpu, kvm) {
1217                 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1218
1219                 if (!xc)
1220                         continue;
1221
1222                 seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
1223                            xc->server_num, xc->vp_id,
1224                            vcpu->arch.xive_saved_state.nsr,
1225                            vcpu->arch.xive_saved_state.cppr,
1226                            vcpu->arch.xive_saved_state.ipb,
1227                            vcpu->arch.xive_saved_state.pipr,
1228                            vcpu->arch.xive_saved_state.w01,
1229                            (u32) vcpu->arch.xive_cam_word);
1230
1231                 kvmppc_xive_debug_show_queues(m, vcpu);
1232         }
1233
1234         return 0;
1235 }
1236
1237 DEFINE_SHOW_ATTRIBUTE(xive_native_debug);
1238
1239 static void xive_native_debugfs_init(struct kvmppc_xive *xive)
1240 {
1241         char *name;
1242
1243         name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
1244         if (!name) {
1245                 pr_err("%s: no memory for name\n", __func__);
1246                 return;
1247         }
1248
1249         xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
1250                                            xive, &xive_native_debug_fops);
1251
1252         pr_debug("%s: created %s\n", __func__, name);
1253         kfree(name);
1254 }
1255
1256 static void kvmppc_xive_native_init(struct kvm_device *dev)
1257 {
1258         struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
1259
1260         /* Register some debug interfaces */
1261         xive_native_debugfs_init(xive);
1262 }
1263
1264 struct kvm_device_ops kvm_xive_native_ops = {
1265         .name = "kvm-xive-native",
1266         .create = kvmppc_xive_native_create,
1267         .init = kvmppc_xive_native_init,
1268         .release = kvmppc_xive_native_release,
1269         .set_attr = kvmppc_xive_native_set_attr,
1270         .get_attr = kvmppc_xive_native_get_attr,
1271         .has_attr = kvmppc_xive_native_has_attr,
1272         .mmap = kvmppc_xive_native_mmap,
1273 };
1274
1275 void kvmppc_xive_native_init_module(void)
1276 {
1277         ;
1278 }
1279
1280 void kvmppc_xive_native_exit_module(void)
1281 {
1282         ;
1283 }