GNU Linux-libre 5.4.274-gnu1
[releases.git] / arch / powerpc / kvm / book3s_xive_native.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2017-2019, IBM Corporation.
4  */
5
6 #define pr_fmt(fmt) "xive-kvm: " fmt
7
8 #include <linux/kernel.h>
9 #include <linux/kvm_host.h>
10 #include <linux/err.h>
11 #include <linux/gfp.h>
12 #include <linux/spinlock.h>
13 #include <linux/delay.h>
14 #include <linux/file.h>
15 #include <asm/uaccess.h>
16 #include <asm/kvm_book3s.h>
17 #include <asm/kvm_ppc.h>
18 #include <asm/hvcall.h>
19 #include <asm/xive.h>
20 #include <asm/xive-regs.h>
21 #include <asm/debug.h>
22 #include <asm/debugfs.h>
23 #include <asm/opal.h>
24
25 #include <linux/debugfs.h>
26 #include <linux/seq_file.h>
27
28 #include "book3s_xive.h"
29
30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
31 {
32         u64 val;
33
34         if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
35                 offset |= offset << 4;
36
37         val = in_be64(xd->eoi_mmio + offset);
38         return (u8)val;
39 }
40
41 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
42 {
43         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
44         struct xive_q *q = &xc->queues[prio];
45
46         xive_native_disable_queue(xc->vp_id, q, prio);
47         if (q->qpage) {
48                 put_page(virt_to_page(q->qpage));
49                 q->qpage = NULL;
50         }
51 }
52
53 static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
54                                               u8 prio, __be32 *qpage,
55                                               u32 order, bool can_escalate)
56 {
57         int rc;
58         __be32 *qpage_prev = q->qpage;
59
60         rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
61                                          can_escalate);
62         if (rc)
63                 return rc;
64
65         if (qpage_prev)
66                 put_page(virt_to_page(qpage_prev));
67
68         return rc;
69 }
70
71 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
72 {
73         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
74         int i;
75
76         if (!kvmppc_xive_enabled(vcpu))
77                 return;
78
79         if (!xc)
80                 return;
81
82         pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
83
84         /* Ensure no interrupt is still routed to that VP */
85         xc->valid = false;
86         kvmppc_xive_disable_vcpu_interrupts(vcpu);
87
88         /* Free escalations */
89         for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
90                 /* Free the escalation irq */
91                 if (xc->esc_virq[i]) {
92                         if (xc->xive->single_escalation)
93                                 xive_cleanup_single_escalation(vcpu, xc,
94                                                         xc->esc_virq[i]);
95                         free_irq(xc->esc_virq[i], vcpu);
96                         irq_dispose_mapping(xc->esc_virq[i]);
97                         kfree(xc->esc_virq_names[i]);
98                         xc->esc_virq[i] = 0;
99                 }
100         }
101
102         /* Disable the VP */
103         xive_native_disable_vp(xc->vp_id);
104
105         /* Clear the cam word so guest entry won't try to push context */
106         vcpu->arch.xive_cam_word = 0;
107
108         /* Free the queues */
109         for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
110                 kvmppc_xive_native_cleanup_queue(vcpu, i);
111         }
112
113         /* Free the VP */
114         kfree(xc);
115
116         /* Cleanup the vcpu */
117         vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
118         vcpu->arch.xive_vcpu = NULL;
119 }
120
121 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
122                                     struct kvm_vcpu *vcpu, u32 server_num)
123 {
124         struct kvmppc_xive *xive = dev->private;
125         struct kvmppc_xive_vcpu *xc = NULL;
126         int rc;
127         u32 vp_id;
128
129         pr_devel("native_connect_vcpu(server=%d)\n", server_num);
130
131         if (dev->ops != &kvm_xive_native_ops) {
132                 pr_devel("Wrong ops !\n");
133                 return -EPERM;
134         }
135         if (xive->kvm != vcpu->kvm)
136                 return -EPERM;
137         if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
138                 return -EBUSY;
139         if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
140                 pr_devel("Out of bounds !\n");
141                 return -EINVAL;
142         }
143
144         mutex_lock(&xive->lock);
145
146         vp_id = kvmppc_xive_vp(xive, server_num);
147         if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
148                 pr_devel("Duplicate !\n");
149                 rc = -EEXIST;
150                 goto bail;
151         }
152
153         xc = kzalloc(sizeof(*xc), GFP_KERNEL);
154         if (!xc) {
155                 rc = -ENOMEM;
156                 goto bail;
157         }
158
159         vcpu->arch.xive_vcpu = xc;
160         xc->xive = xive;
161         xc->vcpu = vcpu;
162         xc->server_num = server_num;
163
164         xc->vp_id = vp_id;
165         xc->valid = true;
166         vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
167
168         rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
169         if (rc) {
170                 pr_err("Failed to get VP info from OPAL: %d\n", rc);
171                 goto bail;
172         }
173
174         /*
175          * Enable the VP first as the single escalation mode will
176          * affect escalation interrupts numbering
177          */
178         rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
179         if (rc) {
180                 pr_err("Failed to enable VP in OPAL: %d\n", rc);
181                 goto bail;
182         }
183
184         /* Configure VCPU fields for use by assembly push/pull */
185         vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
186         vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
187
188         /* TODO: reset all queues to a clean state ? */
189 bail:
190         mutex_unlock(&xive->lock);
191         if (rc)
192                 kvmppc_xive_native_cleanup_vcpu(vcpu);
193
194         return rc;
195 }
196
197 /*
198  * Device passthrough support
199  */
200 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
201 {
202         struct kvmppc_xive *xive = kvm->arch.xive;
203         pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2;
204
205         if (irq >= KVMPPC_XIVE_NR_IRQS)
206                 return -EINVAL;
207
208         /*
209          * Clear the ESB pages of the IRQ number being mapped (or
210          * unmapped) into the guest and let the the VM fault handler
211          * repopulate with the appropriate ESB pages (device or IC)
212          */
213         pr_debug("clearing esb pages for girq 0x%lx\n", irq);
214         mutex_lock(&xive->mapping_lock);
215         if (xive->mapping)
216                 unmap_mapping_range(xive->mapping,
217                                     esb_pgoff << PAGE_SHIFT,
218                                     2ull << PAGE_SHIFT, 1);
219         mutex_unlock(&xive->mapping_lock);
220         return 0;
221 }
222
223 static struct kvmppc_xive_ops kvmppc_xive_native_ops =  {
224         .reset_mapped = kvmppc_xive_native_reset_mapped,
225 };
226
227 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
228 {
229         struct vm_area_struct *vma = vmf->vma;
230         struct kvm_device *dev = vma->vm_file->private_data;
231         struct kvmppc_xive *xive = dev->private;
232         struct kvmppc_xive_src_block *sb;
233         struct kvmppc_xive_irq_state *state;
234         struct xive_irq_data *xd;
235         u32 hw_num;
236         u16 src;
237         u64 page;
238         unsigned long irq;
239         u64 page_offset;
240
241         /*
242          * Linux/KVM uses a two pages ESB setting, one for trigger and
243          * one for EOI
244          */
245         page_offset = vmf->pgoff - vma->vm_pgoff;
246         irq = page_offset / 2;
247
248         sb = kvmppc_xive_find_source(xive, irq, &src);
249         if (!sb) {
250                 pr_devel("%s: source %lx not found !\n", __func__, irq);
251                 return VM_FAULT_SIGBUS;
252         }
253
254         state = &sb->irq_state[src];
255
256         /* Some sanity checking */
257         if (!state->valid) {
258                 pr_devel("%s: source %lx invalid !\n", __func__, irq);
259                 return VM_FAULT_SIGBUS;
260         }
261
262         kvmppc_xive_select_irq(state, &hw_num, &xd);
263
264         arch_spin_lock(&sb->lock);
265
266         /*
267          * first/even page is for trigger
268          * second/odd page is for EOI and management.
269          */
270         page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
271         arch_spin_unlock(&sb->lock);
272
273         if (WARN_ON(!page)) {
274                 pr_err("%s: accessing invalid ESB page for source %lx !\n",
275                        __func__, irq);
276                 return VM_FAULT_SIGBUS;
277         }
278
279         vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
280         return VM_FAULT_NOPAGE;
281 }
282
283 static const struct vm_operations_struct xive_native_esb_vmops = {
284         .fault = xive_native_esb_fault,
285 };
286
287 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
288 {
289         struct vm_area_struct *vma = vmf->vma;
290
291         switch (vmf->pgoff - vma->vm_pgoff) {
292         case 0: /* HW - forbid access */
293         case 1: /* HV - forbid access */
294                 return VM_FAULT_SIGBUS;
295         case 2: /* OS */
296                 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
297                 return VM_FAULT_NOPAGE;
298         case 3: /* USER - TODO */
299         default:
300                 return VM_FAULT_SIGBUS;
301         }
302 }
303
304 static const struct vm_operations_struct xive_native_tima_vmops = {
305         .fault = xive_native_tima_fault,
306 };
307
308 static int kvmppc_xive_native_mmap(struct kvm_device *dev,
309                                    struct vm_area_struct *vma)
310 {
311         struct kvmppc_xive *xive = dev->private;
312
313         /* We only allow mappings at fixed offset for now */
314         if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
315                 if (vma_pages(vma) > 4)
316                         return -EINVAL;
317                 vma->vm_ops = &xive_native_tima_vmops;
318         } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
319                 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
320                         return -EINVAL;
321                 vma->vm_ops = &xive_native_esb_vmops;
322         } else {
323                 return -EINVAL;
324         }
325
326         vma->vm_flags |= VM_IO | VM_PFNMAP;
327         vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
328
329         /*
330          * Grab the KVM device file address_space to be able to clear
331          * the ESB pages mapping when a device is passed-through into
332          * the guest.
333          */
334         xive->mapping = vma->vm_file->f_mapping;
335         return 0;
336 }
337
338 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
339                                          u64 addr)
340 {
341         struct kvmppc_xive_src_block *sb;
342         struct kvmppc_xive_irq_state *state;
343         u64 __user *ubufp = (u64 __user *) addr;
344         u64 val;
345         u16 idx;
346         int rc;
347
348         pr_devel("%s irq=0x%lx\n", __func__, irq);
349
350         if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
351                 return -E2BIG;
352
353         sb = kvmppc_xive_find_source(xive, irq, &idx);
354         if (!sb) {
355                 pr_debug("No source, creating source block...\n");
356                 sb = kvmppc_xive_create_src_block(xive, irq);
357                 if (!sb) {
358                         pr_err("Failed to create block...\n");
359                         return -ENOMEM;
360                 }
361         }
362         state = &sb->irq_state[idx];
363
364         if (get_user(val, ubufp)) {
365                 pr_err("fault getting user info !\n");
366                 return -EFAULT;
367         }
368
369         arch_spin_lock(&sb->lock);
370
371         /*
372          * If the source doesn't already have an IPI, allocate
373          * one and get the corresponding data
374          */
375         if (!state->ipi_number) {
376                 state->ipi_number = xive_native_alloc_irq();
377                 if (state->ipi_number == 0) {
378                         pr_err("Failed to allocate IRQ !\n");
379                         rc = -ENXIO;
380                         goto unlock;
381                 }
382                 xive_native_populate_irq_data(state->ipi_number,
383                                               &state->ipi_data);
384                 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
385                          state->ipi_number, irq);
386         }
387
388         /* Restore LSI state */
389         if (val & KVM_XIVE_LEVEL_SENSITIVE) {
390                 state->lsi = true;
391                 if (val & KVM_XIVE_LEVEL_ASSERTED)
392                         state->asserted = true;
393                 pr_devel("  LSI ! Asserted=%d\n", state->asserted);
394         }
395
396         /* Mask IRQ to start with */
397         state->act_server = 0;
398         state->act_priority = MASKED;
399         xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
400         xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
401
402         /* Increment the number of valid sources and mark this one valid */
403         if (!state->valid)
404                 xive->src_count++;
405         state->valid = true;
406
407         rc = 0;
408
409 unlock:
410         arch_spin_unlock(&sb->lock);
411
412         return rc;
413 }
414
415 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
416                                         struct kvmppc_xive_src_block *sb,
417                                         struct kvmppc_xive_irq_state *state,
418                                         u32 server, u8 priority, bool masked,
419                                         u32 eisn)
420 {
421         struct kvm *kvm = xive->kvm;
422         u32 hw_num;
423         int rc = 0;
424
425         arch_spin_lock(&sb->lock);
426
427         if (state->act_server == server && state->act_priority == priority &&
428             state->eisn == eisn)
429                 goto unlock;
430
431         pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
432                  priority, server, masked, state->act_server,
433                  state->act_priority);
434
435         kvmppc_xive_select_irq(state, &hw_num, NULL);
436
437         if (priority != MASKED && !masked) {
438                 rc = kvmppc_xive_select_target(kvm, &server, priority);
439                 if (rc)
440                         goto unlock;
441
442                 state->act_priority = priority;
443                 state->act_server = server;
444                 state->eisn = eisn;
445
446                 rc = xive_native_configure_irq(hw_num,
447                                                kvmppc_xive_vp(xive, server),
448                                                priority, eisn);
449         } else {
450                 state->act_priority = MASKED;
451                 state->act_server = 0;
452                 state->eisn = 0;
453
454                 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
455         }
456
457 unlock:
458         arch_spin_unlock(&sb->lock);
459         return rc;
460 }
461
462 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
463                                                 long irq, u64 addr)
464 {
465         struct kvmppc_xive_src_block *sb;
466         struct kvmppc_xive_irq_state *state;
467         u64 __user *ubufp = (u64 __user *) addr;
468         u16 src;
469         u64 kvm_cfg;
470         u32 server;
471         u8 priority;
472         bool masked;
473         u32 eisn;
474
475         sb = kvmppc_xive_find_source(xive, irq, &src);
476         if (!sb)
477                 return -ENOENT;
478
479         state = &sb->irq_state[src];
480
481         if (!state->valid)
482                 return -EINVAL;
483
484         if (get_user(kvm_cfg, ubufp))
485                 return -EFAULT;
486
487         pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
488
489         priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
490                 KVM_XIVE_SOURCE_PRIORITY_SHIFT;
491         server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
492                 KVM_XIVE_SOURCE_SERVER_SHIFT;
493         masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
494                 KVM_XIVE_SOURCE_MASKED_SHIFT;
495         eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
496                 KVM_XIVE_SOURCE_EISN_SHIFT;
497
498         if (priority != xive_prio_from_guest(priority)) {
499                 pr_err("invalid priority for queue %d for VCPU %d\n",
500                        priority, server);
501                 return -EINVAL;
502         }
503
504         return kvmppc_xive_native_update_source_config(xive, sb, state, server,
505                                                        priority, masked, eisn);
506 }
507
508 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
509                                           long irq, u64 addr)
510 {
511         struct kvmppc_xive_src_block *sb;
512         struct kvmppc_xive_irq_state *state;
513         struct xive_irq_data *xd;
514         u32 hw_num;
515         u16 src;
516         int rc = 0;
517
518         pr_devel("%s irq=0x%lx", __func__, irq);
519
520         sb = kvmppc_xive_find_source(xive, irq, &src);
521         if (!sb)
522                 return -ENOENT;
523
524         state = &sb->irq_state[src];
525
526         rc = -EINVAL;
527
528         arch_spin_lock(&sb->lock);
529
530         if (state->valid) {
531                 kvmppc_xive_select_irq(state, &hw_num, &xd);
532                 xive_native_sync_source(hw_num);
533                 rc = 0;
534         }
535
536         arch_spin_unlock(&sb->lock);
537         return rc;
538 }
539
540 static int xive_native_validate_queue_size(u32 qshift)
541 {
542         /*
543          * We only support 64K pages for the moment. This is also
544          * advertised in the DT property "ibm,xive-eq-sizes"
545          */
546         switch (qshift) {
547         case 0: /* EQ reset */
548         case 16:
549                 return 0;
550         case 12:
551         case 21:
552         case 24:
553         default:
554                 return -EINVAL;
555         }
556 }
557
558 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
559                                                long eq_idx, u64 addr)
560 {
561         struct kvm *kvm = xive->kvm;
562         struct kvm_vcpu *vcpu;
563         struct kvmppc_xive_vcpu *xc;
564         void __user *ubufp = (void __user *) addr;
565         u32 server;
566         u8 priority;
567         struct kvm_ppc_xive_eq kvm_eq;
568         int rc;
569         __be32 *qaddr = 0;
570         struct page *page;
571         struct xive_q *q;
572         gfn_t gfn;
573         unsigned long page_size;
574         int srcu_idx;
575
576         /*
577          * Demangle priority/server tuple from the EQ identifier
578          */
579         priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
580                 KVM_XIVE_EQ_PRIORITY_SHIFT;
581         server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
582                 KVM_XIVE_EQ_SERVER_SHIFT;
583
584         if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
585                 return -EFAULT;
586
587         vcpu = kvmppc_xive_find_server(kvm, server);
588         if (!vcpu) {
589                 pr_err("Can't find server %d\n", server);
590                 return -ENOENT;
591         }
592         xc = vcpu->arch.xive_vcpu;
593
594         if (priority != xive_prio_from_guest(priority)) {
595                 pr_err("Trying to restore invalid queue %d for VCPU %d\n",
596                        priority, server);
597                 return -EINVAL;
598         }
599         q = &xc->queues[priority];
600
601         pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
602                  __func__, server, priority, kvm_eq.flags,
603                  kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
604
605         /* reset queue and disable queueing */
606         if (!kvm_eq.qshift) {
607                 q->guest_qaddr  = 0;
608                 q->guest_qshift = 0;
609
610                 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
611                                                         NULL, 0, true);
612                 if (rc) {
613                         pr_err("Failed to reset queue %d for VCPU %d: %d\n",
614                                priority, xc->server_num, rc);
615                         return rc;
616                 }
617
618                 return 0;
619         }
620
621         /*
622          * sPAPR specifies a "Unconditional Notify (n) flag" for the
623          * H_INT_SET_QUEUE_CONFIG hcall which forces notification
624          * without using the coalescing mechanisms provided by the
625          * XIVE END ESBs. This is required on KVM as notification
626          * using the END ESBs is not supported.
627          */
628         if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
629                 pr_err("invalid flags %d\n", kvm_eq.flags);
630                 return -EINVAL;
631         }
632
633         rc = xive_native_validate_queue_size(kvm_eq.qshift);
634         if (rc) {
635                 pr_err("invalid queue size %d\n", kvm_eq.qshift);
636                 return rc;
637         }
638
639         if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
640                 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
641                        1ull << kvm_eq.qshift);
642                 return -EINVAL;
643         }
644
645         srcu_idx = srcu_read_lock(&kvm->srcu);
646         gfn = gpa_to_gfn(kvm_eq.qaddr);
647
648         page_size = kvm_host_page_size(vcpu, gfn);
649         if (1ull << kvm_eq.qshift > page_size) {
650                 srcu_read_unlock(&kvm->srcu, srcu_idx);
651                 pr_warn("Incompatible host page size %lx!\n", page_size);
652                 return -EINVAL;
653         }
654
655         page = gfn_to_page(kvm, gfn);
656         if (is_error_page(page)) {
657                 srcu_read_unlock(&kvm->srcu, srcu_idx);
658                 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
659                 return -EINVAL;
660         }
661
662         qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
663         srcu_read_unlock(&kvm->srcu, srcu_idx);
664
665         /*
666          * Backup the queue page guest address to the mark EQ page
667          * dirty for migration.
668          */
669         q->guest_qaddr  = kvm_eq.qaddr;
670         q->guest_qshift = kvm_eq.qshift;
671
672          /*
673           * Unconditional Notification is forced by default at the
674           * OPAL level because the use of END ESBs is not supported by
675           * Linux.
676           */
677         rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
678                                         (__be32 *) qaddr, kvm_eq.qshift, true);
679         if (rc) {
680                 pr_err("Failed to configure queue %d for VCPU %d: %d\n",
681                        priority, xc->server_num, rc);
682                 put_page(page);
683                 return rc;
684         }
685
686         /*
687          * Only restore the queue state when needed. When doing the
688          * H_INT_SET_SOURCE_CONFIG hcall, it should not.
689          */
690         if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
691                 rc = xive_native_set_queue_state(xc->vp_id, priority,
692                                                  kvm_eq.qtoggle,
693                                                  kvm_eq.qindex);
694                 if (rc)
695                         goto error;
696         }
697
698         rc = kvmppc_xive_attach_escalation(vcpu, priority,
699                                            xive->single_escalation);
700 error:
701         if (rc)
702                 kvmppc_xive_native_cleanup_queue(vcpu, priority);
703         return rc;
704 }
705
706 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
707                                                long eq_idx, u64 addr)
708 {
709         struct kvm *kvm = xive->kvm;
710         struct kvm_vcpu *vcpu;
711         struct kvmppc_xive_vcpu *xc;
712         struct xive_q *q;
713         void __user *ubufp = (u64 __user *) addr;
714         u32 server;
715         u8 priority;
716         struct kvm_ppc_xive_eq kvm_eq;
717         u64 qaddr;
718         u64 qshift;
719         u64 qeoi_page;
720         u32 escalate_irq;
721         u64 qflags;
722         int rc;
723
724         /*
725          * Demangle priority/server tuple from the EQ identifier
726          */
727         priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
728                 KVM_XIVE_EQ_PRIORITY_SHIFT;
729         server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
730                 KVM_XIVE_EQ_SERVER_SHIFT;
731
732         vcpu = kvmppc_xive_find_server(kvm, server);
733         if (!vcpu) {
734                 pr_err("Can't find server %d\n", server);
735                 return -ENOENT;
736         }
737         xc = vcpu->arch.xive_vcpu;
738
739         if (priority != xive_prio_from_guest(priority)) {
740                 pr_err("invalid priority for queue %d for VCPU %d\n",
741                        priority, server);
742                 return -EINVAL;
743         }
744         q = &xc->queues[priority];
745
746         memset(&kvm_eq, 0, sizeof(kvm_eq));
747
748         if (!q->qpage)
749                 return 0;
750
751         rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
752                                         &qeoi_page, &escalate_irq, &qflags);
753         if (rc)
754                 return rc;
755
756         kvm_eq.flags = 0;
757         if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
758                 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
759
760         kvm_eq.qshift = q->guest_qshift;
761         kvm_eq.qaddr  = q->guest_qaddr;
762
763         rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
764                                          &kvm_eq.qindex);
765         if (rc)
766                 return rc;
767
768         pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
769                  __func__, server, priority, kvm_eq.flags,
770                  kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
771
772         if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
773                 return -EFAULT;
774
775         return 0;
776 }
777
778 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
779 {
780         int i;
781
782         for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
783                 struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
784
785                 if (!state->valid)
786                         continue;
787
788                 if (state->act_priority == MASKED)
789                         continue;
790
791                 state->eisn = 0;
792                 state->act_server = 0;
793                 state->act_priority = MASKED;
794                 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
795                 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
796                 if (state->pt_number) {
797                         xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
798                         xive_native_configure_irq(state->pt_number,
799                                                   0, MASKED, 0);
800                 }
801         }
802 }
803
804 static int kvmppc_xive_reset(struct kvmppc_xive *xive)
805 {
806         struct kvm *kvm = xive->kvm;
807         struct kvm_vcpu *vcpu;
808         unsigned int i;
809
810         pr_devel("%s\n", __func__);
811
812         mutex_lock(&xive->lock);
813
814         kvm_for_each_vcpu(i, vcpu, kvm) {
815                 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
816                 unsigned int prio;
817
818                 if (!xc)
819                         continue;
820
821                 kvmppc_xive_disable_vcpu_interrupts(vcpu);
822
823                 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
824
825                         /* Single escalation, no queue 7 */
826                         if (prio == 7 && xive->single_escalation)
827                                 break;
828
829                         if (xc->esc_virq[prio]) {
830                                 free_irq(xc->esc_virq[prio], vcpu);
831                                 irq_dispose_mapping(xc->esc_virq[prio]);
832                                 kfree(xc->esc_virq_names[prio]);
833                                 xc->esc_virq[prio] = 0;
834                         }
835
836                         kvmppc_xive_native_cleanup_queue(vcpu, prio);
837                 }
838         }
839
840         for (i = 0; i <= xive->max_sbid; i++) {
841                 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
842
843                 if (sb) {
844                         arch_spin_lock(&sb->lock);
845                         kvmppc_xive_reset_sources(sb);
846                         arch_spin_unlock(&sb->lock);
847                 }
848         }
849
850         mutex_unlock(&xive->lock);
851
852         return 0;
853 }
854
855 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
856 {
857         int j;
858
859         for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
860                 struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
861                 struct xive_irq_data *xd;
862                 u32 hw_num;
863
864                 if (!state->valid)
865                         continue;
866
867                 /*
868                  * The struct kvmppc_xive_irq_state reflects the state
869                  * of the EAS configuration and not the state of the
870                  * source. The source is masked setting the PQ bits to
871                  * '-Q', which is what is being done before calling
872                  * the KVM_DEV_XIVE_EQ_SYNC control.
873                  *
874                  * If a source EAS is configured, OPAL syncs the XIVE
875                  * IC of the source and the XIVE IC of the previous
876                  * target if any.
877                  *
878                  * So it should be fine ignoring MASKED sources as
879                  * they have been synced already.
880                  */
881                 if (state->act_priority == MASKED)
882                         continue;
883
884                 kvmppc_xive_select_irq(state, &hw_num, &xd);
885                 xive_native_sync_source(hw_num);
886                 xive_native_sync_queue(hw_num);
887         }
888 }
889
890 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
891 {
892         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
893         unsigned int prio;
894         int srcu_idx;
895
896         if (!xc)
897                 return -ENOENT;
898
899         for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
900                 struct xive_q *q = &xc->queues[prio];
901
902                 if (!q->qpage)
903                         continue;
904
905                 /* Mark EQ page dirty for migration */
906                 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
907                 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
908                 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
909         }
910         return 0;
911 }
912
913 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
914 {
915         struct kvm *kvm = xive->kvm;
916         struct kvm_vcpu *vcpu;
917         unsigned int i;
918
919         pr_devel("%s\n", __func__);
920
921         mutex_lock(&xive->lock);
922         for (i = 0; i <= xive->max_sbid; i++) {
923                 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
924
925                 if (sb) {
926                         arch_spin_lock(&sb->lock);
927                         kvmppc_xive_native_sync_sources(sb);
928                         arch_spin_unlock(&sb->lock);
929                 }
930         }
931
932         kvm_for_each_vcpu(i, vcpu, kvm) {
933                 kvmppc_xive_native_vcpu_eq_sync(vcpu);
934         }
935         mutex_unlock(&xive->lock);
936
937         return 0;
938 }
939
940 static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
941                                        struct kvm_device_attr *attr)
942 {
943         struct kvmppc_xive *xive = dev->private;
944
945         switch (attr->group) {
946         case KVM_DEV_XIVE_GRP_CTRL:
947                 switch (attr->attr) {
948                 case KVM_DEV_XIVE_RESET:
949                         return kvmppc_xive_reset(xive);
950                 case KVM_DEV_XIVE_EQ_SYNC:
951                         return kvmppc_xive_native_eq_sync(xive);
952                 }
953                 break;
954         case KVM_DEV_XIVE_GRP_SOURCE:
955                 return kvmppc_xive_native_set_source(xive, attr->attr,
956                                                      attr->addr);
957         case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
958                 return kvmppc_xive_native_set_source_config(xive, attr->attr,
959                                                             attr->addr);
960         case KVM_DEV_XIVE_GRP_EQ_CONFIG:
961                 return kvmppc_xive_native_set_queue_config(xive, attr->attr,
962                                                            attr->addr);
963         case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
964                 return kvmppc_xive_native_sync_source(xive, attr->attr,
965                                                       attr->addr);
966         }
967         return -ENXIO;
968 }
969
970 static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
971                                        struct kvm_device_attr *attr)
972 {
973         struct kvmppc_xive *xive = dev->private;
974
975         switch (attr->group) {
976         case KVM_DEV_XIVE_GRP_EQ_CONFIG:
977                 return kvmppc_xive_native_get_queue_config(xive, attr->attr,
978                                                            attr->addr);
979         }
980         return -ENXIO;
981 }
982
983 static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
984                                        struct kvm_device_attr *attr)
985 {
986         switch (attr->group) {
987         case KVM_DEV_XIVE_GRP_CTRL:
988                 switch (attr->attr) {
989                 case KVM_DEV_XIVE_RESET:
990                 case KVM_DEV_XIVE_EQ_SYNC:
991                         return 0;
992                 }
993                 break;
994         case KVM_DEV_XIVE_GRP_SOURCE:
995         case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
996         case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
997                 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
998                     attr->attr < KVMPPC_XIVE_NR_IRQS)
999                         return 0;
1000                 break;
1001         case KVM_DEV_XIVE_GRP_EQ_CONFIG:
1002                 return 0;
1003         }
1004         return -ENXIO;
1005 }
1006
1007 /*
1008  * Called when device fd is closed.  kvm->lock is held.
1009  */
1010 static void kvmppc_xive_native_release(struct kvm_device *dev)
1011 {
1012         struct kvmppc_xive *xive = dev->private;
1013         struct kvm *kvm = xive->kvm;
1014         struct kvm_vcpu *vcpu;
1015         int i;
1016
1017         pr_devel("Releasing xive native device\n");
1018
1019         /*
1020          * Clear the KVM device file address_space which is used to
1021          * unmap the ESB pages when a device is passed-through.
1022          */
1023         mutex_lock(&xive->mapping_lock);
1024         xive->mapping = NULL;
1025         mutex_unlock(&xive->mapping_lock);
1026
1027         /*
1028          * Since this is the device release function, we know that
1029          * userspace does not have any open fd or mmap referring to
1030          * the device.  Therefore there can not be any of the
1031          * device attribute set/get, mmap, or page fault functions
1032          * being executed concurrently, and similarly, the
1033          * connect_vcpu and set/clr_mapped functions also cannot
1034          * be being executed.
1035          */
1036
1037         debugfs_remove(xive->dentry);
1038
1039         /*
1040          * We should clean up the vCPU interrupt presenters first.
1041          */
1042         kvm_for_each_vcpu(i, vcpu, kvm) {
1043                 /*
1044                  * Take vcpu->mutex to ensure that no one_reg get/set ioctl
1045                  * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
1046                  * Holding the vcpu->mutex also means that the vcpu cannot
1047                  * be executing the KVM_RUN ioctl, and therefore it cannot
1048                  * be executing the XIVE push or pull code or accessing
1049                  * the XIVE MMIO regions.
1050                  */
1051                 mutex_lock(&vcpu->mutex);
1052                 kvmppc_xive_native_cleanup_vcpu(vcpu);
1053                 mutex_unlock(&vcpu->mutex);
1054         }
1055
1056         /*
1057          * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
1058          * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
1059          * against xive code getting called during vcpu execution or
1060          * set/get one_reg operations.
1061          */
1062         kvm->arch.xive = NULL;
1063
1064         for (i = 0; i <= xive->max_sbid; i++) {
1065                 if (xive->src_blocks[i])
1066                         kvmppc_xive_free_sources(xive->src_blocks[i]);
1067                 kfree(xive->src_blocks[i]);
1068                 xive->src_blocks[i] = NULL;
1069         }
1070
1071         if (xive->vp_base != XIVE_INVALID_VP)
1072                 xive_native_free_vp_block(xive->vp_base);
1073
1074         /*
1075          * A reference of the kvmppc_xive pointer is now kept under
1076          * the xive_devices struct of the machine for reuse. It is
1077          * freed when the VM is destroyed for now until we fix all the
1078          * execution paths.
1079          */
1080
1081         kfree(dev);
1082 }
1083
1084 /*
1085  * Create a XIVE device.  kvm->lock is held.
1086  */
1087 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
1088 {
1089         struct kvmppc_xive *xive;
1090         struct kvm *kvm = dev->kvm;
1091         int ret = 0;
1092
1093         pr_devel("Creating xive native device\n");
1094
1095         if (kvm->arch.xive)
1096                 return -EEXIST;
1097
1098         xive = kvmppc_xive_get_device(kvm, type);
1099         if (!xive)
1100                 return -ENOMEM;
1101
1102         dev->private = xive;
1103         xive->dev = dev;
1104         xive->kvm = kvm;
1105         mutex_init(&xive->mapping_lock);
1106         mutex_init(&xive->lock);
1107
1108         /*
1109          * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
1110          * a default. Getting the max number of CPUs the VM was
1111          * configured with would improve our usage of the XIVE VP space.
1112          */
1113         xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
1114         pr_devel("VP_Base=%x\n", xive->vp_base);
1115
1116         if (xive->vp_base == XIVE_INVALID_VP)
1117                 ret = -ENXIO;
1118
1119         xive->single_escalation = xive_native_has_single_escalation();
1120         xive->ops = &kvmppc_xive_native_ops;
1121
1122         if (ret)
1123                 return ret;
1124
1125         kvm->arch.xive = xive;
1126         return 0;
1127 }
1128
1129 /*
1130  * Interrupt Pending Buffer (IPB) offset
1131  */
1132 #define TM_IPB_SHIFT 40
1133 #define TM_IPB_MASK  (((u64) 0xFF) << TM_IPB_SHIFT)
1134
1135 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1136 {
1137         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1138         u64 opal_state;
1139         int rc;
1140
1141         if (!kvmppc_xive_enabled(vcpu))
1142                 return -EPERM;
1143
1144         if (!xc)
1145                 return -ENOENT;
1146
1147         /* Thread context registers. We only care about IPB and CPPR */
1148         val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
1149
1150         /* Get the VP state from OPAL */
1151         rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
1152         if (rc)
1153                 return rc;
1154
1155         /*
1156          * Capture the backup of IPB register in the NVT structure and
1157          * merge it in our KVM VP state.
1158          */
1159         val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
1160
1161         pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
1162                  __func__,
1163                  vcpu->arch.xive_saved_state.nsr,
1164                  vcpu->arch.xive_saved_state.cppr,
1165                  vcpu->arch.xive_saved_state.ipb,
1166                  vcpu->arch.xive_saved_state.pipr,
1167                  vcpu->arch.xive_saved_state.w01,
1168                  (u32) vcpu->arch.xive_cam_word, opal_state);
1169
1170         return 0;
1171 }
1172
1173 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1174 {
1175         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1176         struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
1177
1178         pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
1179                  val->xive_timaval[0], val->xive_timaval[1]);
1180
1181         if (!kvmppc_xive_enabled(vcpu))
1182                 return -EPERM;
1183
1184         if (!xc || !xive)
1185                 return -ENOENT;
1186
1187         /* We can't update the state of a "pushed" VCPU  */
1188         if (WARN_ON(vcpu->arch.xive_pushed))
1189                 return -EBUSY;
1190
1191         /*
1192          * Restore the thread context registers. IPB and CPPR should
1193          * be the only ones that matter.
1194          */
1195         vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
1196
1197         /*
1198          * There is no need to restore the XIVE internal state (IPB
1199          * stored in the NVT) as the IPB register was merged in KVM VP
1200          * state when captured.
1201          */
1202         return 0;
1203 }
1204
1205 bool kvmppc_xive_native_supported(void)
1206 {
1207         return xive_native_has_queue_state_support();
1208 }
1209
1210 static int xive_native_debug_show(struct seq_file *m, void *private)
1211 {
1212         struct kvmppc_xive *xive = m->private;
1213         struct kvm *kvm = xive->kvm;
1214         struct kvm_vcpu *vcpu;
1215         unsigned int i;
1216
1217         if (!kvm)
1218                 return 0;
1219
1220         seq_puts(m, "=========\nVCPU state\n=========\n");
1221
1222         kvm_for_each_vcpu(i, vcpu, kvm) {
1223                 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1224
1225                 if (!xc)
1226                         continue;
1227
1228                 seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
1229                            xc->server_num,
1230                            vcpu->arch.xive_saved_state.nsr,
1231                            vcpu->arch.xive_saved_state.cppr,
1232                            vcpu->arch.xive_saved_state.ipb,
1233                            vcpu->arch.xive_saved_state.pipr,
1234                            vcpu->arch.xive_saved_state.w01,
1235                            (u32) vcpu->arch.xive_cam_word);
1236
1237                 kvmppc_xive_debug_show_queues(m, vcpu);
1238         }
1239
1240         return 0;
1241 }
1242
1243 static int xive_native_debug_open(struct inode *inode, struct file *file)
1244 {
1245         return single_open(file, xive_native_debug_show, inode->i_private);
1246 }
1247
1248 static const struct file_operations xive_native_debug_fops = {
1249         .open = xive_native_debug_open,
1250         .read = seq_read,
1251         .llseek = seq_lseek,
1252         .release = single_release,
1253 };
1254
1255 static void xive_native_debugfs_init(struct kvmppc_xive *xive)
1256 {
1257         char *name;
1258
1259         name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
1260         if (!name) {
1261                 pr_err("%s: no memory for name\n", __func__);
1262                 return;
1263         }
1264
1265         xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
1266                                            xive, &xive_native_debug_fops);
1267
1268         pr_debug("%s: created %s\n", __func__, name);
1269         kfree(name);
1270 }
1271
1272 static void kvmppc_xive_native_init(struct kvm_device *dev)
1273 {
1274         struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
1275
1276         /* Register some debug interfaces */
1277         xive_native_debugfs_init(xive);
1278 }
1279
1280 struct kvm_device_ops kvm_xive_native_ops = {
1281         .name = "kvm-xive-native",
1282         .create = kvmppc_xive_native_create,
1283         .init = kvmppc_xive_native_init,
1284         .release = kvmppc_xive_native_release,
1285         .set_attr = kvmppc_xive_native_set_attr,
1286         .get_attr = kvmppc_xive_native_get_attr,
1287         .has_attr = kvmppc_xive_native_has_attr,
1288         .mmap = kvmppc_xive_native_mmap,
1289 };
1290
1291 void kvmppc_xive_native_init_module(void)
1292 {
1293         ;
1294 }
1295
1296 void kvmppc_xive_native_exit_module(void)
1297 {
1298         ;
1299 }