GNU Linux-libre 6.1.86-gnu
[releases.git] / drivers / xen / events / events_base.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Xen event channels
4  *
5  * Xen models interrupts with abstract event channels.  Because each
6  * domain gets 1024 event channels, but NR_IRQ is not that large, we
7  * must dynamically map irqs<->event channels.  The event channels
8  * interface with the rest of the kernel by defining a xen interrupt
9  * chip.  When an event is received, it is mapped to an irq and sent
10  * through the normal interrupt processing path.
11  *
12  * There are four kinds of events which can be mapped to an event
13  * channel:
14  *
15  * 1. Inter-domain notifications.  This includes all the virtual
16  *    device events, since they're driven by front-ends in another domain
17  *    (typically dom0).
18  * 2. VIRQs, typically used for timers.  These are per-cpu events.
19  * 3. IPIs.
20  * 4. PIRQs - Hardware interrupts.
21  *
22  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
23  */
24
25 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
26
27 #include <linux/linkage.h>
28 #include <linux/interrupt.h>
29 #include <linux/irq.h>
30 #include <linux/moduleparam.h>
31 #include <linux/string.h>
32 #include <linux/memblock.h>
33 #include <linux/slab.h>
34 #include <linux/irqnr.h>
35 #include <linux/pci.h>
36 #include <linux/rcupdate.h>
37 #include <linux/spinlock.h>
38 #include <linux/cpuhotplug.h>
39 #include <linux/atomic.h>
40 #include <linux/ktime.h>
41
42 #ifdef CONFIG_X86
43 #include <asm/desc.h>
44 #include <asm/ptrace.h>
45 #include <asm/idtentry.h>
46 #include <asm/irq.h>
47 #include <asm/io_apic.h>
48 #include <asm/i8259.h>
49 #include <asm/xen/cpuid.h>
50 #include <asm/xen/pci.h>
51 #endif
52 #include <asm/sync_bitops.h>
53 #include <asm/xen/hypercall.h>
54 #include <asm/xen/hypervisor.h>
55 #include <xen/page.h>
56
57 #include <xen/xen.h>
58 #include <xen/hvm.h>
59 #include <xen/xen-ops.h>
60 #include <xen/events.h>
61 #include <xen/interface/xen.h>
62 #include <xen/interface/event_channel.h>
63 #include <xen/interface/hvm/hvm_op.h>
64 #include <xen/interface/hvm/params.h>
65 #include <xen/interface/physdev.h>
66 #include <xen/interface/sched.h>
67 #include <xen/interface/vcpu.h>
68 #include <xen/xenbus.h>
69 #include <asm/hw_irq.h>
70
71 #include "events_internal.h"
72
73 #undef MODULE_PARAM_PREFIX
74 #define MODULE_PARAM_PREFIX "xen."
75
76 /* Interrupt types. */
77 enum xen_irq_type {
78         IRQT_UNBOUND = 0,
79         IRQT_PIRQ,
80         IRQT_VIRQ,
81         IRQT_IPI,
82         IRQT_EVTCHN
83 };
84
85 /*
86  * Packed IRQ information:
87  * type - enum xen_irq_type
88  * event channel - irq->event channel mapping
89  * cpu - cpu this event channel is bound to
90  * index - type-specific information:
91  *    PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
92  *           guest, or GSI (real passthrough IRQ) of the device.
93  *    VIRQ - virq number
94  *    IPI - IPI vector
95  *    EVTCHN -
96  */
97 struct irq_info {
98         struct list_head list;
99         struct list_head eoi_list;
100         struct rcu_work rwork;
101         short refcnt;
102         u8 spurious_cnt;
103         u8 is_accounted;
104         short type;             /* type: IRQT_* */
105         u8 mask_reason;         /* Why is event channel masked */
106 #define EVT_MASK_REASON_EXPLICIT        0x01
107 #define EVT_MASK_REASON_TEMPORARY       0x02
108 #define EVT_MASK_REASON_EOI_PENDING     0x04
109         u8 is_active;           /* Is event just being handled? */
110         unsigned irq;
111         evtchn_port_t evtchn;   /* event channel */
112         unsigned short cpu;     /* cpu bound */
113         unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
114         unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
115         u64 eoi_time;           /* Time in jiffies when to EOI. */
116         raw_spinlock_t lock;
117
118         union {
119                 unsigned short virq;
120                 enum ipi_vector ipi;
121                 struct {
122                         unsigned short pirq;
123                         unsigned short gsi;
124                         unsigned char vector;
125                         unsigned char flags;
126                         uint16_t domid;
127                 } pirq;
128                 struct xenbus_device *interdomain;
129         } u;
130 };
131
132 #define PIRQ_NEEDS_EOI  (1 << 0)
133 #define PIRQ_SHAREABLE  (1 << 1)
134 #define PIRQ_MSI_GROUP  (1 << 2)
135
136 static uint __read_mostly event_loop_timeout = 2;
137 module_param(event_loop_timeout, uint, 0644);
138
139 static uint __read_mostly event_eoi_delay = 10;
140 module_param(event_eoi_delay, uint, 0644);
141
142 const struct evtchn_ops *evtchn_ops;
143
144 /*
145  * This lock protects updates to the following mapping and reference-count
146  * arrays. The lock does not need to be acquired to read the mapping tables.
147  */
148 static DEFINE_MUTEX(irq_mapping_update_lock);
149
150 /*
151  * Lock hierarchy:
152  *
153  * irq_mapping_update_lock
154  *   IRQ-desc lock
155  *     percpu eoi_list_lock
156  *       irq_info->lock
157  */
158
159 static LIST_HEAD(xen_irq_list_head);
160
161 /* IRQ <-> VIRQ mapping. */
162 static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
163
164 /* IRQ <-> IPI mapping */
165 static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
166
167 /* Event channel distribution data */
168 static atomic_t channels_on_cpu[NR_CPUS];
169
170 static int **evtchn_to_irq;
171 #ifdef CONFIG_X86
172 static unsigned long *pirq_eoi_map;
173 #endif
174 static bool (*pirq_needs_eoi)(unsigned irq);
175
176 #define EVTCHN_ROW(e)  (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
177 #define EVTCHN_COL(e)  (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
178 #define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
179
180 /* Xen will never allocate port zero for any purpose. */
181 #define VALID_EVTCHN(chn)       ((chn) != 0)
182
183 static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
184
185 static struct irq_chip xen_dynamic_chip;
186 static struct irq_chip xen_lateeoi_chip;
187 static struct irq_chip xen_percpu_chip;
188 static struct irq_chip xen_pirq_chip;
189 static void enable_dynirq(struct irq_data *data);
190 static void disable_dynirq(struct irq_data *data);
191
192 static DEFINE_PER_CPU(unsigned int, irq_epoch);
193
194 static void clear_evtchn_to_irq_row(int *evtchn_row)
195 {
196         unsigned col;
197
198         for (col = 0; col < EVTCHN_PER_ROW; col++)
199                 WRITE_ONCE(evtchn_row[col], -1);
200 }
201
202 static void clear_evtchn_to_irq_all(void)
203 {
204         unsigned row;
205
206         for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
207                 if (evtchn_to_irq[row] == NULL)
208                         continue;
209                 clear_evtchn_to_irq_row(evtchn_to_irq[row]);
210         }
211 }
212
213 static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
214 {
215         unsigned row;
216         unsigned col;
217         int *evtchn_row;
218
219         if (evtchn >= xen_evtchn_max_channels())
220                 return -EINVAL;
221
222         row = EVTCHN_ROW(evtchn);
223         col = EVTCHN_COL(evtchn);
224
225         if (evtchn_to_irq[row] == NULL) {
226                 /* Unallocated irq entries return -1 anyway */
227                 if (irq == -1)
228                         return 0;
229
230                 evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0);
231                 if (evtchn_row == NULL)
232                         return -ENOMEM;
233
234                 clear_evtchn_to_irq_row(evtchn_row);
235
236                 /*
237                  * We've prepared an empty row for the mapping. If a different
238                  * thread was faster inserting it, we can drop ours.
239                  */
240                 if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL)
241                         free_page((unsigned long) evtchn_row);
242         }
243
244         WRITE_ONCE(evtchn_to_irq[row][col], irq);
245         return 0;
246 }
247
248 int get_evtchn_to_irq(evtchn_port_t evtchn)
249 {
250         if (evtchn >= xen_evtchn_max_channels())
251                 return -1;
252         if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
253                 return -1;
254         return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
255 }
256
257 /* Get info for IRQ */
258 static struct irq_info *info_for_irq(unsigned irq)
259 {
260         if (irq < nr_legacy_irqs())
261                 return legacy_info_ptrs[irq];
262         else
263                 return irq_get_chip_data(irq);
264 }
265
266 static void set_info_for_irq(unsigned int irq, struct irq_info *info)
267 {
268         if (irq < nr_legacy_irqs())
269                 legacy_info_ptrs[irq] = info;
270         else
271                 irq_set_chip_data(irq, info);
272 }
273
274 /* Per CPU channel accounting */
275 static void channels_on_cpu_dec(struct irq_info *info)
276 {
277         if (!info->is_accounted)
278                 return;
279
280         info->is_accounted = 0;
281
282         if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
283                 return;
284
285         WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
286 }
287
288 static void channels_on_cpu_inc(struct irq_info *info)
289 {
290         if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
291                 return;
292
293         if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
294                                             INT_MAX)))
295                 return;
296
297         info->is_accounted = 1;
298 }
299
300 static void delayed_free_irq(struct work_struct *work)
301 {
302         struct irq_info *info = container_of(to_rcu_work(work), struct irq_info,
303                                              rwork);
304         unsigned int irq = info->irq;
305
306         /* Remove the info pointer only now, with no potential users left. */
307         set_info_for_irq(irq, NULL);
308
309         kfree(info);
310
311         /* Legacy IRQ descriptors are managed by the arch. */
312         if (irq >= nr_legacy_irqs())
313                 irq_free_desc(irq);
314 }
315
316 /* Constructors for packed IRQ information. */
317 static int xen_irq_info_common_setup(struct irq_info *info,
318                                      unsigned irq,
319                                      enum xen_irq_type type,
320                                      evtchn_port_t evtchn,
321                                      unsigned short cpu)
322 {
323         int ret;
324
325         BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
326
327         info->type = type;
328         info->irq = irq;
329         info->evtchn = evtchn;
330         info->cpu = cpu;
331         info->mask_reason = EVT_MASK_REASON_EXPLICIT;
332         raw_spin_lock_init(&info->lock);
333
334         ret = set_evtchn_to_irq(evtchn, irq);
335         if (ret < 0)
336                 return ret;
337
338         irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
339
340         return xen_evtchn_port_setup(evtchn);
341 }
342
343 static int xen_irq_info_evtchn_setup(unsigned irq,
344                                      evtchn_port_t evtchn,
345                                      struct xenbus_device *dev)
346 {
347         struct irq_info *info = info_for_irq(irq);
348         int ret;
349
350         ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
351         info->u.interdomain = dev;
352         if (dev)
353                 atomic_inc(&dev->event_channels);
354
355         return ret;
356 }
357
358 static int xen_irq_info_ipi_setup(unsigned cpu,
359                                   unsigned irq,
360                                   evtchn_port_t evtchn,
361                                   enum ipi_vector ipi)
362 {
363         struct irq_info *info = info_for_irq(irq);
364
365         info->u.ipi = ipi;
366
367         per_cpu(ipi_to_irq, cpu)[ipi] = irq;
368
369         return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
370 }
371
372 static int xen_irq_info_virq_setup(unsigned cpu,
373                                    unsigned irq,
374                                    evtchn_port_t evtchn,
375                                    unsigned virq)
376 {
377         struct irq_info *info = info_for_irq(irq);
378
379         info->u.virq = virq;
380
381         per_cpu(virq_to_irq, cpu)[virq] = irq;
382
383         return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
384 }
385
386 static int xen_irq_info_pirq_setup(unsigned irq,
387                                    evtchn_port_t evtchn,
388                                    unsigned pirq,
389                                    unsigned gsi,
390                                    uint16_t domid,
391                                    unsigned char flags)
392 {
393         struct irq_info *info = info_for_irq(irq);
394
395         info->u.pirq.pirq = pirq;
396         info->u.pirq.gsi = gsi;
397         info->u.pirq.domid = domid;
398         info->u.pirq.flags = flags;
399
400         return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
401 }
402
403 static void xen_irq_info_cleanup(struct irq_info *info)
404 {
405         set_evtchn_to_irq(info->evtchn, -1);
406         xen_evtchn_port_remove(info->evtchn, info->cpu);
407         info->evtchn = 0;
408         channels_on_cpu_dec(info);
409 }
410
411 /*
412  * Accessors for packed IRQ information.
413  */
414 evtchn_port_t evtchn_from_irq(unsigned irq)
415 {
416         const struct irq_info *info = NULL;
417
418         if (likely(irq < nr_irqs))
419                 info = info_for_irq(irq);
420         if (!info)
421                 return 0;
422
423         return info->evtchn;
424 }
425
426 unsigned int irq_from_evtchn(evtchn_port_t evtchn)
427 {
428         return get_evtchn_to_irq(evtchn);
429 }
430 EXPORT_SYMBOL_GPL(irq_from_evtchn);
431
432 int irq_from_virq(unsigned int cpu, unsigned int virq)
433 {
434         return per_cpu(virq_to_irq, cpu)[virq];
435 }
436
437 static enum ipi_vector ipi_from_irq(unsigned irq)
438 {
439         struct irq_info *info = info_for_irq(irq);
440
441         BUG_ON(info == NULL);
442         BUG_ON(info->type != IRQT_IPI);
443
444         return info->u.ipi;
445 }
446
447 static unsigned virq_from_irq(unsigned irq)
448 {
449         struct irq_info *info = info_for_irq(irq);
450
451         BUG_ON(info == NULL);
452         BUG_ON(info->type != IRQT_VIRQ);
453
454         return info->u.virq;
455 }
456
457 static unsigned pirq_from_irq(unsigned irq)
458 {
459         struct irq_info *info = info_for_irq(irq);
460
461         BUG_ON(info == NULL);
462         BUG_ON(info->type != IRQT_PIRQ);
463
464         return info->u.pirq.pirq;
465 }
466
467 static enum xen_irq_type type_from_irq(unsigned irq)
468 {
469         return info_for_irq(irq)->type;
470 }
471
472 static unsigned cpu_from_irq(unsigned irq)
473 {
474         return info_for_irq(irq)->cpu;
475 }
476
477 unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
478 {
479         int irq = get_evtchn_to_irq(evtchn);
480         unsigned ret = 0;
481
482         if (irq != -1)
483                 ret = cpu_from_irq(irq);
484
485         return ret;
486 }
487
488 static void do_mask(struct irq_info *info, u8 reason)
489 {
490         unsigned long flags;
491
492         raw_spin_lock_irqsave(&info->lock, flags);
493
494         if (!info->mask_reason)
495                 mask_evtchn(info->evtchn);
496
497         info->mask_reason |= reason;
498
499         raw_spin_unlock_irqrestore(&info->lock, flags);
500 }
501
502 static void do_unmask(struct irq_info *info, u8 reason)
503 {
504         unsigned long flags;
505
506         raw_spin_lock_irqsave(&info->lock, flags);
507
508         info->mask_reason &= ~reason;
509
510         if (!info->mask_reason)
511                 unmask_evtchn(info->evtchn);
512
513         raw_spin_unlock_irqrestore(&info->lock, flags);
514 }
515
516 #ifdef CONFIG_X86
517 static bool pirq_check_eoi_map(unsigned irq)
518 {
519         return test_bit(pirq_from_irq(irq), pirq_eoi_map);
520 }
521 #endif
522
523 static bool pirq_needs_eoi_flag(unsigned irq)
524 {
525         struct irq_info *info = info_for_irq(irq);
526         BUG_ON(info->type != IRQT_PIRQ);
527
528         return info->u.pirq.flags & PIRQ_NEEDS_EOI;
529 }
530
531 static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
532                                bool force_affinity)
533 {
534         int irq = get_evtchn_to_irq(evtchn);
535         struct irq_info *info = info_for_irq(irq);
536
537         BUG_ON(irq == -1);
538
539         if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
540                 struct irq_data *data = irq_get_irq_data(irq);
541
542                 irq_data_update_affinity(data, cpumask_of(cpu));
543                 irq_data_update_effective_affinity(data, cpumask_of(cpu));
544         }
545
546         xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
547
548         channels_on_cpu_dec(info);
549         info->cpu = cpu;
550         channels_on_cpu_inc(info);
551 }
552
553 /**
554  * notify_remote_via_irq - send event to remote end of event channel via irq
555  * @irq: irq of event channel to send event to
556  *
557  * Unlike notify_remote_via_evtchn(), this is safe to use across
558  * save/restore. Notifications on a broken connection are silently
559  * dropped.
560  */
561 void notify_remote_via_irq(int irq)
562 {
563         evtchn_port_t evtchn = evtchn_from_irq(irq);
564
565         if (VALID_EVTCHN(evtchn))
566                 notify_remote_via_evtchn(evtchn);
567 }
568 EXPORT_SYMBOL_GPL(notify_remote_via_irq);
569
570 struct lateeoi_work {
571         struct delayed_work delayed;
572         spinlock_t eoi_list_lock;
573         struct list_head eoi_list;
574 };
575
576 static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
577
578 static void lateeoi_list_del(struct irq_info *info)
579 {
580         struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
581         unsigned long flags;
582
583         spin_lock_irqsave(&eoi->eoi_list_lock, flags);
584         list_del_init(&info->eoi_list);
585         spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
586 }
587
588 static void lateeoi_list_add(struct irq_info *info)
589 {
590         struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
591         struct irq_info *elem;
592         u64 now = get_jiffies_64();
593         unsigned long delay;
594         unsigned long flags;
595
596         if (now < info->eoi_time)
597                 delay = info->eoi_time - now;
598         else
599                 delay = 1;
600
601         spin_lock_irqsave(&eoi->eoi_list_lock, flags);
602
603         elem = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
604                                         eoi_list);
605         if (!elem || info->eoi_time < elem->eoi_time) {
606                 list_add(&info->eoi_list, &eoi->eoi_list);
607                 mod_delayed_work_on(info->eoi_cpu, system_wq,
608                                     &eoi->delayed, delay);
609         } else {
610                 list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
611                         if (elem->eoi_time <= info->eoi_time)
612                                 break;
613                 }
614                 list_add(&info->eoi_list, &elem->eoi_list);
615         }
616
617         spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
618 }
619
620 static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
621 {
622         evtchn_port_t evtchn;
623         unsigned int cpu;
624         unsigned int delay = 0;
625
626         evtchn = info->evtchn;
627         if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
628                 return;
629
630         if (spurious) {
631                 struct xenbus_device *dev = info->u.interdomain;
632                 unsigned int threshold = 1;
633
634                 if (dev && dev->spurious_threshold)
635                         threshold = dev->spurious_threshold;
636
637                 if ((1 << info->spurious_cnt) < (HZ << 2)) {
638                         if (info->spurious_cnt != 0xFF)
639                                 info->spurious_cnt++;
640                 }
641                 if (info->spurious_cnt > threshold) {
642                         delay = 1 << (info->spurious_cnt - 1 - threshold);
643                         if (delay > HZ)
644                                 delay = HZ;
645                         if (!info->eoi_time)
646                                 info->eoi_cpu = smp_processor_id();
647                         info->eoi_time = get_jiffies_64() + delay;
648                         if (dev)
649                                 atomic_add(delay, &dev->jiffies_eoi_delayed);
650                 }
651                 if (dev)
652                         atomic_inc(&dev->spurious_events);
653         } else {
654                 info->spurious_cnt = 0;
655         }
656
657         cpu = info->eoi_cpu;
658         if (info->eoi_time &&
659             (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
660                 lateeoi_list_add(info);
661                 return;
662         }
663
664         info->eoi_time = 0;
665
666         /* is_active hasn't been reset yet, do it now. */
667         smp_store_release(&info->is_active, 0);
668         do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
669 }
670
671 static void xen_irq_lateeoi_worker(struct work_struct *work)
672 {
673         struct lateeoi_work *eoi;
674         struct irq_info *info;
675         u64 now = get_jiffies_64();
676         unsigned long flags;
677
678         eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
679
680         rcu_read_lock();
681
682         while (true) {
683                 spin_lock_irqsave(&eoi->eoi_list_lock, flags);
684
685                 info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
686                                                 eoi_list);
687
688                 if (info == NULL)
689                         break;
690
691                 if (now < info->eoi_time) {
692                         mod_delayed_work_on(info->eoi_cpu, system_wq,
693                                             &eoi->delayed,
694                                             info->eoi_time - now);
695                         break;
696                 }
697
698                 list_del_init(&info->eoi_list);
699
700                 spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
701
702                 info->eoi_time = 0;
703
704                 xen_irq_lateeoi_locked(info, false);
705         }
706
707         spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
708
709         rcu_read_unlock();
710 }
711
712 static void xen_cpu_init_eoi(unsigned int cpu)
713 {
714         struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
715
716         INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
717         spin_lock_init(&eoi->eoi_list_lock);
718         INIT_LIST_HEAD(&eoi->eoi_list);
719 }
720
721 void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
722 {
723         struct irq_info *info;
724
725         rcu_read_lock();
726
727         info = info_for_irq(irq);
728
729         if (info)
730                 xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
731
732         rcu_read_unlock();
733 }
734 EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
735
736 static void xen_irq_init(unsigned irq)
737 {
738         struct irq_info *info;
739
740         info = kzalloc(sizeof(*info), GFP_KERNEL);
741         if (info == NULL)
742                 panic("Unable to allocate metadata for IRQ%d\n", irq);
743
744         info->type = IRQT_UNBOUND;
745         info->refcnt = -1;
746         INIT_RCU_WORK(&info->rwork, delayed_free_irq);
747
748         set_info_for_irq(irq, info);
749         /*
750          * Interrupt affinity setting can be immediate. No point
751          * in delaying it until an interrupt is handled.
752          */
753         irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
754
755         INIT_LIST_HEAD(&info->eoi_list);
756         list_add_tail(&info->list, &xen_irq_list_head);
757 }
758
759 static int __must_check xen_allocate_irqs_dynamic(int nvec)
760 {
761         int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
762
763         if (irq >= 0) {
764                 for (i = 0; i < nvec; i++)
765                         xen_irq_init(irq + i);
766         }
767
768         return irq;
769 }
770
771 static inline int __must_check xen_allocate_irq_dynamic(void)
772 {
773
774         return xen_allocate_irqs_dynamic(1);
775 }
776
777 static int __must_check xen_allocate_irq_gsi(unsigned gsi)
778 {
779         int irq;
780
781         /*
782          * A PV guest has no concept of a GSI (since it has no ACPI
783          * nor access to/knowledge of the physical APICs). Therefore
784          * all IRQs are dynamically allocated from the entire IRQ
785          * space.
786          */
787         if (xen_pv_domain() && !xen_initial_domain())
788                 return xen_allocate_irq_dynamic();
789
790         /* Legacy IRQ descriptors are already allocated by the arch. */
791         if (gsi < nr_legacy_irqs())
792                 irq = gsi;
793         else
794                 irq = irq_alloc_desc_at(gsi, -1);
795
796         xen_irq_init(irq);
797
798         return irq;
799 }
800
801 static void xen_free_irq(unsigned irq)
802 {
803         struct irq_info *info = info_for_irq(irq);
804
805         if (WARN_ON(!info))
806                 return;
807
808         if (!list_empty(&info->eoi_list))
809                 lateeoi_list_del(info);
810
811         list_del(&info->list);
812
813         WARN_ON(info->refcnt > 0);
814
815         queue_rcu_work(system_wq, &info->rwork);
816 }
817
818 static void xen_evtchn_close(evtchn_port_t port)
819 {
820         struct evtchn_close close;
821
822         close.port = port;
823         if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
824                 BUG();
825 }
826
827 /* Not called for lateeoi events. */
828 static void event_handler_exit(struct irq_info *info)
829 {
830         smp_store_release(&info->is_active, 0);
831         clear_evtchn(info->evtchn);
832 }
833
834 static void pirq_query_unmask(int irq)
835 {
836         struct physdev_irq_status_query irq_status;
837         struct irq_info *info = info_for_irq(irq);
838
839         BUG_ON(info->type != IRQT_PIRQ);
840
841         irq_status.irq = pirq_from_irq(irq);
842         if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
843                 irq_status.flags = 0;
844
845         info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
846         if (irq_status.flags & XENIRQSTAT_needs_eoi)
847                 info->u.pirq.flags |= PIRQ_NEEDS_EOI;
848 }
849
850 static void eoi_pirq(struct irq_data *data)
851 {
852         struct irq_info *info = info_for_irq(data->irq);
853         evtchn_port_t evtchn = info ? info->evtchn : 0;
854         struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
855         int rc = 0;
856
857         if (!VALID_EVTCHN(evtchn))
858                 return;
859
860         event_handler_exit(info);
861
862         if (pirq_needs_eoi(data->irq)) {
863                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
864                 WARN_ON(rc);
865         }
866 }
867
868 static void mask_ack_pirq(struct irq_data *data)
869 {
870         disable_dynirq(data);
871         eoi_pirq(data);
872 }
873
874 static unsigned int __startup_pirq(unsigned int irq)
875 {
876         struct evtchn_bind_pirq bind_pirq;
877         struct irq_info *info = info_for_irq(irq);
878         evtchn_port_t evtchn = evtchn_from_irq(irq);
879         int rc;
880
881         BUG_ON(info->type != IRQT_PIRQ);
882
883         if (VALID_EVTCHN(evtchn))
884                 goto out;
885
886         bind_pirq.pirq = pirq_from_irq(irq);
887         /* NB. We are happy to share unless we are probing. */
888         bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
889                                         BIND_PIRQ__WILL_SHARE : 0;
890         rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
891         if (rc != 0) {
892                 pr_warn("Failed to obtain physical IRQ %d\n", irq);
893                 return 0;
894         }
895         evtchn = bind_pirq.port;
896
897         pirq_query_unmask(irq);
898
899         rc = set_evtchn_to_irq(evtchn, irq);
900         if (rc)
901                 goto err;
902
903         info->evtchn = evtchn;
904         bind_evtchn_to_cpu(evtchn, 0, false);
905
906         rc = xen_evtchn_port_setup(evtchn);
907         if (rc)
908                 goto err;
909
910 out:
911         do_unmask(info, EVT_MASK_REASON_EXPLICIT);
912
913         eoi_pirq(irq_get_irq_data(irq));
914
915         return 0;
916
917 err:
918         pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
919         xen_evtchn_close(evtchn);
920         return 0;
921 }
922
923 static unsigned int startup_pirq(struct irq_data *data)
924 {
925         return __startup_pirq(data->irq);
926 }
927
928 static void shutdown_pirq(struct irq_data *data)
929 {
930         unsigned int irq = data->irq;
931         struct irq_info *info = info_for_irq(irq);
932         evtchn_port_t evtchn = evtchn_from_irq(irq);
933
934         BUG_ON(info->type != IRQT_PIRQ);
935
936         if (!VALID_EVTCHN(evtchn))
937                 return;
938
939         do_mask(info, EVT_MASK_REASON_EXPLICIT);
940         xen_irq_info_cleanup(info);
941         xen_evtchn_close(evtchn);
942 }
943
944 static void enable_pirq(struct irq_data *data)
945 {
946         enable_dynirq(data);
947 }
948
949 static void disable_pirq(struct irq_data *data)
950 {
951         disable_dynirq(data);
952 }
953
954 int xen_irq_from_gsi(unsigned gsi)
955 {
956         struct irq_info *info;
957
958         list_for_each_entry(info, &xen_irq_list_head, list) {
959                 if (info->type != IRQT_PIRQ)
960                         continue;
961
962                 if (info->u.pirq.gsi == gsi)
963                         return info->irq;
964         }
965
966         return -1;
967 }
968 EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
969
970 static void __unbind_from_irq(unsigned int irq)
971 {
972         evtchn_port_t evtchn = evtchn_from_irq(irq);
973         struct irq_info *info = info_for_irq(irq);
974
975         if (info->refcnt > 0) {
976                 info->refcnt--;
977                 if (info->refcnt != 0)
978                         return;
979         }
980
981         if (VALID_EVTCHN(evtchn)) {
982                 unsigned int cpu = cpu_from_irq(irq);
983                 struct xenbus_device *dev;
984
985                 switch (type_from_irq(irq)) {
986                 case IRQT_VIRQ:
987                         per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
988                         break;
989                 case IRQT_IPI:
990                         per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
991                         break;
992                 case IRQT_EVTCHN:
993                         dev = info->u.interdomain;
994                         if (dev)
995                                 atomic_dec(&dev->event_channels);
996                         break;
997                 default:
998                         break;
999                 }
1000
1001                 xen_irq_info_cleanup(info);
1002                 xen_evtchn_close(evtchn);
1003         }
1004
1005         xen_free_irq(irq);
1006 }
1007
1008 /*
1009  * Do not make any assumptions regarding the relationship between the
1010  * IRQ number returned here and the Xen pirq argument.
1011  *
1012  * Note: We don't assign an event channel until the irq actually started
1013  * up.  Return an existing irq if we've already got one for the gsi.
1014  *
1015  * Shareable implies level triggered, not shareable implies edge
1016  * triggered here.
1017  */
1018 int xen_bind_pirq_gsi_to_irq(unsigned gsi,
1019                              unsigned pirq, int shareable, char *name)
1020 {
1021         int irq;
1022         struct physdev_irq irq_op;
1023         int ret;
1024
1025         mutex_lock(&irq_mapping_update_lock);
1026
1027         irq = xen_irq_from_gsi(gsi);
1028         if (irq != -1) {
1029                 pr_info("%s: returning irq %d for gsi %u\n",
1030                         __func__, irq, gsi);
1031                 goto out;
1032         }
1033
1034         irq = xen_allocate_irq_gsi(gsi);
1035         if (irq < 0)
1036                 goto out;
1037
1038         irq_op.irq = irq;
1039         irq_op.vector = 0;
1040
1041         /* Only the privileged domain can do this. For non-priv, the pcifront
1042          * driver provides a PCI bus that does the call to do exactly
1043          * this in the priv domain. */
1044         if (xen_initial_domain() &&
1045             HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
1046                 xen_free_irq(irq);
1047                 irq = -ENOSPC;
1048                 goto out;
1049         }
1050
1051         ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
1052                                shareable ? PIRQ_SHAREABLE : 0);
1053         if (ret < 0) {
1054                 __unbind_from_irq(irq);
1055                 irq = ret;
1056                 goto out;
1057         }
1058
1059         pirq_query_unmask(irq);
1060         /* We try to use the handler with the appropriate semantic for the
1061          * type of interrupt: if the interrupt is an edge triggered
1062          * interrupt we use handle_edge_irq.
1063          *
1064          * On the other hand if the interrupt is level triggered we use
1065          * handle_fasteoi_irq like the native code does for this kind of
1066          * interrupts.
1067          *
1068          * Depending on the Xen version, pirq_needs_eoi might return true
1069          * not only for level triggered interrupts but for edge triggered
1070          * interrupts too. In any case Xen always honors the eoi mechanism,
1071          * not injecting any more pirqs of the same kind if the first one
1072          * hasn't received an eoi yet. Therefore using the fasteoi handler
1073          * is the right choice either way.
1074          */
1075         if (shareable)
1076                 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1077                                 handle_fasteoi_irq, name);
1078         else
1079                 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1080                                 handle_edge_irq, name);
1081
1082 out:
1083         mutex_unlock(&irq_mapping_update_lock);
1084
1085         return irq;
1086 }
1087
1088 #ifdef CONFIG_PCI_MSI
1089 int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
1090 {
1091         int rc;
1092         struct physdev_get_free_pirq op_get_free_pirq;
1093
1094         op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
1095         rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
1096
1097         WARN_ONCE(rc == -ENOSYS,
1098                   "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
1099
1100         return rc ? -1 : op_get_free_pirq.pirq;
1101 }
1102
1103 int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
1104                              int pirq, int nvec, const char *name, domid_t domid)
1105 {
1106         int i, irq, ret;
1107
1108         mutex_lock(&irq_mapping_update_lock);
1109
1110         irq = xen_allocate_irqs_dynamic(nvec);
1111         if (irq < 0)
1112                 goto out;
1113
1114         for (i = 0; i < nvec; i++) {
1115                 irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
1116
1117                 ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
1118                                               i == 0 ? 0 : PIRQ_MSI_GROUP);
1119                 if (ret < 0)
1120                         goto error_irq;
1121         }
1122
1123         ret = irq_set_msi_desc(irq, msidesc);
1124         if (ret < 0)
1125                 goto error_irq;
1126 out:
1127         mutex_unlock(&irq_mapping_update_lock);
1128         return irq;
1129 error_irq:
1130         while (nvec--)
1131                 __unbind_from_irq(irq + nvec);
1132         mutex_unlock(&irq_mapping_update_lock);
1133         return ret;
1134 }
1135 #endif
1136
1137 int xen_destroy_irq(int irq)
1138 {
1139         struct physdev_unmap_pirq unmap_irq;
1140         struct irq_info *info = info_for_irq(irq);
1141         int rc = -ENOENT;
1142
1143         mutex_lock(&irq_mapping_update_lock);
1144
1145         /*
1146          * If trying to remove a vector in a MSI group different
1147          * than the first one skip the PIRQ unmap unless this vector
1148          * is the first one in the group.
1149          */
1150         if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) {
1151                 unmap_irq.pirq = info->u.pirq.pirq;
1152                 unmap_irq.domid = info->u.pirq.domid;
1153                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
1154                 /* If another domain quits without making the pci_disable_msix
1155                  * call, the Xen hypervisor takes care of freeing the PIRQs
1156                  * (free_domain_pirqs).
1157                  */
1158                 if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
1159                         pr_info("domain %d does not have %d anymore\n",
1160                                 info->u.pirq.domid, info->u.pirq.pirq);
1161                 else if (rc) {
1162                         pr_warn("unmap irq failed %d\n", rc);
1163                         goto out;
1164                 }
1165         }
1166
1167         xen_free_irq(irq);
1168
1169 out:
1170         mutex_unlock(&irq_mapping_update_lock);
1171         return rc;
1172 }
1173
1174 int xen_irq_from_pirq(unsigned pirq)
1175 {
1176         int irq;
1177
1178         struct irq_info *info;
1179
1180         mutex_lock(&irq_mapping_update_lock);
1181
1182         list_for_each_entry(info, &xen_irq_list_head, list) {
1183                 if (info->type != IRQT_PIRQ)
1184                         continue;
1185                 irq = info->irq;
1186                 if (info->u.pirq.pirq == pirq)
1187                         goto out;
1188         }
1189         irq = -1;
1190 out:
1191         mutex_unlock(&irq_mapping_update_lock);
1192
1193         return irq;
1194 }
1195
1196
1197 int xen_pirq_from_irq(unsigned irq)
1198 {
1199         return pirq_from_irq(irq);
1200 }
1201 EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
1202
1203 static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
1204                                    struct xenbus_device *dev)
1205 {
1206         int irq;
1207         int ret;
1208
1209         if (evtchn >= xen_evtchn_max_channels())
1210                 return -ENOMEM;
1211
1212         mutex_lock(&irq_mapping_update_lock);
1213
1214         irq = get_evtchn_to_irq(evtchn);
1215
1216         if (irq == -1) {
1217                 irq = xen_allocate_irq_dynamic();
1218                 if (irq < 0)
1219                         goto out;
1220
1221                 irq_set_chip_and_handler_name(irq, chip,
1222                                               handle_edge_irq, "event");
1223
1224                 ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
1225                 if (ret < 0) {
1226                         __unbind_from_irq(irq);
1227                         irq = ret;
1228                         goto out;
1229                 }
1230                 /*
1231                  * New interdomain events are initially bound to vCPU0 This
1232                  * is required to setup the event channel in the first
1233                  * place and also important for UP guests because the
1234                  * affinity setting is not invoked on them so nothing would
1235                  * bind the channel.
1236                  */
1237                 bind_evtchn_to_cpu(evtchn, 0, false);
1238         } else {
1239                 struct irq_info *info = info_for_irq(irq);
1240                 WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
1241         }
1242
1243 out:
1244         mutex_unlock(&irq_mapping_update_lock);
1245
1246         return irq;
1247 }
1248
1249 int bind_evtchn_to_irq(evtchn_port_t evtchn)
1250 {
1251         return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL);
1252 }
1253 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
1254
1255 int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
1256 {
1257         return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL);
1258 }
1259 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
1260
1261 static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
1262 {
1263         struct evtchn_bind_ipi bind_ipi;
1264         evtchn_port_t evtchn;
1265         int ret, irq;
1266
1267         mutex_lock(&irq_mapping_update_lock);
1268
1269         irq = per_cpu(ipi_to_irq, cpu)[ipi];
1270
1271         if (irq == -1) {
1272                 irq = xen_allocate_irq_dynamic();
1273                 if (irq < 0)
1274                         goto out;
1275
1276                 irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1277                                               handle_percpu_irq, "ipi");
1278
1279                 bind_ipi.vcpu = xen_vcpu_nr(cpu);
1280                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1281                                                 &bind_ipi) != 0)
1282                         BUG();
1283                 evtchn = bind_ipi.port;
1284
1285                 ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
1286                 if (ret < 0) {
1287                         __unbind_from_irq(irq);
1288                         irq = ret;
1289                         goto out;
1290                 }
1291                 /*
1292                  * Force the affinity mask to the target CPU so proc shows
1293                  * the correct target.
1294                  */
1295                 bind_evtchn_to_cpu(evtchn, cpu, true);
1296         } else {
1297                 struct irq_info *info = info_for_irq(irq);
1298                 WARN_ON(info == NULL || info->type != IRQT_IPI);
1299         }
1300
1301  out:
1302         mutex_unlock(&irq_mapping_update_lock);
1303         return irq;
1304 }
1305
1306 static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
1307                                                evtchn_port_t remote_port,
1308                                                struct irq_chip *chip)
1309 {
1310         struct evtchn_bind_interdomain bind_interdomain;
1311         int err;
1312
1313         bind_interdomain.remote_dom  = dev->otherend_id;
1314         bind_interdomain.remote_port = remote_port;
1315
1316         err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
1317                                           &bind_interdomain);
1318
1319         return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
1320                                                chip, dev);
1321 }
1322
1323 int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
1324                                            evtchn_port_t remote_port)
1325 {
1326         return bind_interdomain_evtchn_to_irq_chip(dev, remote_port,
1327                                                    &xen_lateeoi_chip);
1328 }
1329 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
1330
1331 static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
1332 {
1333         struct evtchn_status status;
1334         evtchn_port_t port;
1335         int rc = -ENOENT;
1336
1337         memset(&status, 0, sizeof(status));
1338         for (port = 0; port < xen_evtchn_max_channels(); port++) {
1339                 status.dom = DOMID_SELF;
1340                 status.port = port;
1341                 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
1342                 if (rc < 0)
1343                         continue;
1344                 if (status.status != EVTCHNSTAT_virq)
1345                         continue;
1346                 if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
1347                         *evtchn = port;
1348                         break;
1349                 }
1350         }
1351         return rc;
1352 }
1353
1354 /**
1355  * xen_evtchn_nr_channels - number of usable event channel ports
1356  *
1357  * This may be less than the maximum supported by the current
1358  * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
1359  * supported.
1360  */
1361 unsigned xen_evtchn_nr_channels(void)
1362 {
1363         return evtchn_ops->nr_channels();
1364 }
1365 EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
1366
1367 int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
1368 {
1369         struct evtchn_bind_virq bind_virq;
1370         evtchn_port_t evtchn = 0;
1371         int irq, ret;
1372
1373         mutex_lock(&irq_mapping_update_lock);
1374
1375         irq = per_cpu(virq_to_irq, cpu)[virq];
1376
1377         if (irq == -1) {
1378                 irq = xen_allocate_irq_dynamic();
1379                 if (irq < 0)
1380                         goto out;
1381
1382                 if (percpu)
1383                         irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1384                                                       handle_percpu_irq, "virq");
1385                 else
1386                         irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
1387                                                       handle_edge_irq, "virq");
1388
1389                 bind_virq.virq = virq;
1390                 bind_virq.vcpu = xen_vcpu_nr(cpu);
1391                 ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1392                                                 &bind_virq);
1393                 if (ret == 0)
1394                         evtchn = bind_virq.port;
1395                 else {
1396                         if (ret == -EEXIST)
1397                                 ret = find_virq(virq, cpu, &evtchn);
1398                         BUG_ON(ret < 0);
1399                 }
1400
1401                 ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1402                 if (ret < 0) {
1403                         __unbind_from_irq(irq);
1404                         irq = ret;
1405                         goto out;
1406                 }
1407
1408                 /*
1409                  * Force the affinity mask for percpu interrupts so proc
1410                  * shows the correct target.
1411                  */
1412                 bind_evtchn_to_cpu(evtchn, cpu, percpu);
1413         } else {
1414                 struct irq_info *info = info_for_irq(irq);
1415                 WARN_ON(info == NULL || info->type != IRQT_VIRQ);
1416         }
1417
1418 out:
1419         mutex_unlock(&irq_mapping_update_lock);
1420
1421         return irq;
1422 }
1423
1424 static void unbind_from_irq(unsigned int irq)
1425 {
1426         mutex_lock(&irq_mapping_update_lock);
1427         __unbind_from_irq(irq);
1428         mutex_unlock(&irq_mapping_update_lock);
1429 }
1430
1431 static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
1432                                           irq_handler_t handler,
1433                                           unsigned long irqflags,
1434                                           const char *devname, void *dev_id,
1435                                           struct irq_chip *chip)
1436 {
1437         int irq, retval;
1438
1439         irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL);
1440         if (irq < 0)
1441                 return irq;
1442         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1443         if (retval != 0) {
1444                 unbind_from_irq(irq);
1445                 return retval;
1446         }
1447
1448         return irq;
1449 }
1450
1451 int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
1452                               irq_handler_t handler,
1453                               unsigned long irqflags,
1454                               const char *devname, void *dev_id)
1455 {
1456         return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1457                                               devname, dev_id,
1458                                               &xen_dynamic_chip);
1459 }
1460 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
1461
1462 int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
1463                                       irq_handler_t handler,
1464                                       unsigned long irqflags,
1465                                       const char *devname, void *dev_id)
1466 {
1467         return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1468                                               devname, dev_id,
1469                                               &xen_lateeoi_chip);
1470 }
1471 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
1472
1473 static int bind_interdomain_evtchn_to_irqhandler_chip(
1474                 struct xenbus_device *dev, evtchn_port_t remote_port,
1475                 irq_handler_t handler, unsigned long irqflags,
1476                 const char *devname, void *dev_id, struct irq_chip *chip)
1477 {
1478         int irq, retval;
1479
1480         irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip);
1481         if (irq < 0)
1482                 return irq;
1483
1484         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1485         if (retval != 0) {
1486                 unbind_from_irq(irq);
1487                 return retval;
1488         }
1489
1490         return irq;
1491 }
1492
1493 int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev,
1494                                                   evtchn_port_t remote_port,
1495                                                   irq_handler_t handler,
1496                                                   unsigned long irqflags,
1497                                                   const char *devname,
1498                                                   void *dev_id)
1499 {
1500         return bind_interdomain_evtchn_to_irqhandler_chip(dev,
1501                                 remote_port, handler, irqflags, devname,
1502                                 dev_id, &xen_lateeoi_chip);
1503 }
1504 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
1505
1506 int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
1507                             irq_handler_t handler,
1508                             unsigned long irqflags, const char *devname, void *dev_id)
1509 {
1510         int irq, retval;
1511
1512         irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU);
1513         if (irq < 0)
1514                 return irq;
1515         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1516         if (retval != 0) {
1517                 unbind_from_irq(irq);
1518                 return retval;
1519         }
1520
1521         return irq;
1522 }
1523 EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
1524
1525 int bind_ipi_to_irqhandler(enum ipi_vector ipi,
1526                            unsigned int cpu,
1527                            irq_handler_t handler,
1528                            unsigned long irqflags,
1529                            const char *devname,
1530                            void *dev_id)
1531 {
1532         int irq, retval;
1533
1534         irq = bind_ipi_to_irq(ipi, cpu);
1535         if (irq < 0)
1536                 return irq;
1537
1538         irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME;
1539         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1540         if (retval != 0) {
1541                 unbind_from_irq(irq);
1542                 return retval;
1543         }
1544
1545         return irq;
1546 }
1547
1548 void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1549 {
1550         struct irq_info *info = info_for_irq(irq);
1551
1552         if (WARN_ON(!info))
1553                 return;
1554         free_irq(irq, dev_id);
1555         unbind_from_irq(irq);
1556 }
1557 EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1558
1559 /**
1560  * xen_set_irq_priority() - set an event channel priority.
1561  * @irq:irq bound to an event channel.
1562  * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
1563  */
1564 int xen_set_irq_priority(unsigned irq, unsigned priority)
1565 {
1566         struct evtchn_set_priority set_priority;
1567
1568         set_priority.port = evtchn_from_irq(irq);
1569         set_priority.priority = priority;
1570
1571         return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
1572                                            &set_priority);
1573 }
1574 EXPORT_SYMBOL_GPL(xen_set_irq_priority);
1575
1576 int evtchn_make_refcounted(evtchn_port_t evtchn)
1577 {
1578         int irq = get_evtchn_to_irq(evtchn);
1579         struct irq_info *info;
1580
1581         if (irq == -1)
1582                 return -ENOENT;
1583
1584         info = info_for_irq(irq);
1585
1586         if (!info)
1587                 return -ENOENT;
1588
1589         WARN_ON(info->refcnt != -1);
1590
1591         info->refcnt = 1;
1592
1593         return 0;
1594 }
1595 EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
1596
1597 int evtchn_get(evtchn_port_t evtchn)
1598 {
1599         int irq;
1600         struct irq_info *info;
1601         int err = -ENOENT;
1602
1603         if (evtchn >= xen_evtchn_max_channels())
1604                 return -EINVAL;
1605
1606         mutex_lock(&irq_mapping_update_lock);
1607
1608         irq = get_evtchn_to_irq(evtchn);
1609         if (irq == -1)
1610                 goto done;
1611
1612         info = info_for_irq(irq);
1613
1614         if (!info)
1615                 goto done;
1616
1617         err = -EINVAL;
1618         if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
1619                 goto done;
1620
1621         info->refcnt++;
1622         err = 0;
1623  done:
1624         mutex_unlock(&irq_mapping_update_lock);
1625
1626         return err;
1627 }
1628 EXPORT_SYMBOL_GPL(evtchn_get);
1629
1630 void evtchn_put(evtchn_port_t evtchn)
1631 {
1632         int irq = get_evtchn_to_irq(evtchn);
1633         if (WARN_ON(irq == -1))
1634                 return;
1635         unbind_from_irq(irq);
1636 }
1637 EXPORT_SYMBOL_GPL(evtchn_put);
1638
1639 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1640 {
1641         int irq;
1642
1643 #ifdef CONFIG_X86
1644         if (unlikely(vector == XEN_NMI_VECTOR)) {
1645                 int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu),
1646                                              NULL);
1647                 if (rc < 0)
1648                         printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
1649                 return;
1650         }
1651 #endif
1652         irq = per_cpu(ipi_to_irq, cpu)[vector];
1653         BUG_ON(irq < 0);
1654         notify_remote_via_irq(irq);
1655 }
1656
1657 struct evtchn_loop_ctrl {
1658         ktime_t timeout;
1659         unsigned count;
1660         bool defer_eoi;
1661 };
1662
1663 void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
1664 {
1665         int irq;
1666         struct irq_info *info;
1667         struct xenbus_device *dev;
1668
1669         irq = get_evtchn_to_irq(port);
1670         if (irq == -1)
1671                 return;
1672
1673         /*
1674          * Check for timeout every 256 events.
1675          * We are setting the timeout value only after the first 256
1676          * events in order to not hurt the common case of few loop
1677          * iterations. The 256 is basically an arbitrary value.
1678          *
1679          * In case we are hitting the timeout we need to defer all further
1680          * EOIs in order to ensure to leave the event handling loop rather
1681          * sooner than later.
1682          */
1683         if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
1684                 ktime_t kt = ktime_get();
1685
1686                 if (!ctrl->timeout) {
1687                         kt = ktime_add_ms(kt,
1688                                           jiffies_to_msecs(event_loop_timeout));
1689                         ctrl->timeout = kt;
1690                 } else if (kt > ctrl->timeout) {
1691                         ctrl->defer_eoi = true;
1692                 }
1693         }
1694
1695         info = info_for_irq(irq);
1696         if (xchg_acquire(&info->is_active, 1))
1697                 return;
1698
1699         dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
1700         if (dev)
1701                 atomic_inc(&dev->events);
1702
1703         if (ctrl->defer_eoi) {
1704                 info->eoi_cpu = smp_processor_id();
1705                 info->irq_epoch = __this_cpu_read(irq_epoch);
1706                 info->eoi_time = get_jiffies_64() + event_eoi_delay;
1707         }
1708
1709         generic_handle_irq(irq);
1710 }
1711
1712 int xen_evtchn_do_upcall(void)
1713 {
1714         struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1715         int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE;
1716         int cpu = smp_processor_id();
1717         struct evtchn_loop_ctrl ctrl = { 0 };
1718
1719         /*
1720          * When closing an event channel the associated IRQ must not be freed
1721          * until all cpus have left the event handling loop. This is ensured
1722          * by taking the rcu_read_lock() while handling events, as freeing of
1723          * the IRQ is handled via queue_rcu_work() _after_ closing the event
1724          * channel.
1725          */
1726         rcu_read_lock();
1727
1728         do {
1729                 vcpu_info->evtchn_upcall_pending = 0;
1730
1731                 xen_evtchn_handle_events(cpu, &ctrl);
1732
1733                 BUG_ON(!irqs_disabled());
1734
1735                 virt_rmb(); /* Hypervisor can set upcall pending. */
1736
1737         } while (vcpu_info->evtchn_upcall_pending);
1738
1739         rcu_read_unlock();
1740
1741         /*
1742          * Increment irq_epoch only now to defer EOIs only for
1743          * xen_irq_lateeoi() invocations occurring from inside the loop
1744          * above.
1745          */
1746         __this_cpu_inc(irq_epoch);
1747
1748         return ret;
1749 }
1750 EXPORT_SYMBOL_GPL(xen_evtchn_do_upcall);
1751
1752 /* Rebind a new event channel to an existing irq. */
1753 void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
1754 {
1755         struct irq_info *info = info_for_irq(irq);
1756
1757         if (WARN_ON(!info))
1758                 return;
1759
1760         /* Make sure the irq is masked, since the new event channel
1761            will also be masked. */
1762         disable_irq(irq);
1763
1764         mutex_lock(&irq_mapping_update_lock);
1765
1766         /* After resume the irq<->evtchn mappings are all cleared out */
1767         BUG_ON(get_evtchn_to_irq(evtchn) != -1);
1768         /* Expect irq to have been bound before,
1769            so there should be a proper type */
1770         BUG_ON(info->type == IRQT_UNBOUND);
1771
1772         (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
1773
1774         mutex_unlock(&irq_mapping_update_lock);
1775
1776         bind_evtchn_to_cpu(evtchn, info->cpu, false);
1777
1778         /* Unmask the event channel. */
1779         enable_irq(irq);
1780 }
1781
1782 /* Rebind an evtchn so that it gets delivered to a specific cpu */
1783 static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
1784 {
1785         struct evtchn_bind_vcpu bind_vcpu;
1786         evtchn_port_t evtchn = info ? info->evtchn : 0;
1787
1788         if (!VALID_EVTCHN(evtchn))
1789                 return -1;
1790
1791         if (!xen_support_evtchn_rebind())
1792                 return -1;
1793
1794         /* Send future instances of this interrupt to other vcpu. */
1795         bind_vcpu.port = evtchn;
1796         bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
1797
1798         /*
1799          * Mask the event while changing the VCPU binding to prevent
1800          * it being delivered on an unexpected VCPU.
1801          */
1802         do_mask(info, EVT_MASK_REASON_TEMPORARY);
1803
1804         /*
1805          * If this fails, it usually just indicates that we're dealing with a
1806          * virq or IPI channel, which don't actually need to be rebound. Ignore
1807          * it, but don't do the xenlinux-level rebind in that case.
1808          */
1809         if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
1810                 bind_evtchn_to_cpu(evtchn, tcpu, false);
1811
1812         do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1813
1814         return 0;
1815 }
1816
1817 /*
1818  * Find the CPU within @dest mask which has the least number of channels
1819  * assigned. This is not precise as the per cpu counts can be modified
1820  * concurrently.
1821  */
1822 static unsigned int select_target_cpu(const struct cpumask *dest)
1823 {
1824         unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
1825
1826         for_each_cpu_and(cpu, dest, cpu_online_mask) {
1827                 unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
1828
1829                 if (curch < minch) {
1830                         minch = curch;
1831                         best_cpu = cpu;
1832                 }
1833         }
1834
1835         /*
1836          * Catch the unlikely case that dest contains no online CPUs. Can't
1837          * recurse.
1838          */
1839         if (best_cpu == UINT_MAX)
1840                 return select_target_cpu(cpu_online_mask);
1841
1842         return best_cpu;
1843 }
1844
1845 static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1846                             bool force)
1847 {
1848         unsigned int tcpu = select_target_cpu(dest);
1849         int ret;
1850
1851         ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu);
1852         if (!ret)
1853                 irq_data_update_effective_affinity(data, cpumask_of(tcpu));
1854
1855         return ret;
1856 }
1857
1858 static void enable_dynirq(struct irq_data *data)
1859 {
1860         struct irq_info *info = info_for_irq(data->irq);
1861         evtchn_port_t evtchn = info ? info->evtchn : 0;
1862
1863         if (VALID_EVTCHN(evtchn))
1864                 do_unmask(info, EVT_MASK_REASON_EXPLICIT);
1865 }
1866
1867 static void disable_dynirq(struct irq_data *data)
1868 {
1869         struct irq_info *info = info_for_irq(data->irq);
1870         evtchn_port_t evtchn = info ? info->evtchn : 0;
1871
1872         if (VALID_EVTCHN(evtchn))
1873                 do_mask(info, EVT_MASK_REASON_EXPLICIT);
1874 }
1875
1876 static void ack_dynirq(struct irq_data *data)
1877 {
1878         struct irq_info *info = info_for_irq(data->irq);
1879         evtchn_port_t evtchn = info ? info->evtchn : 0;
1880
1881         if (VALID_EVTCHN(evtchn))
1882                 event_handler_exit(info);
1883 }
1884
1885 static void mask_ack_dynirq(struct irq_data *data)
1886 {
1887         disable_dynirq(data);
1888         ack_dynirq(data);
1889 }
1890
1891 static void lateeoi_ack_dynirq(struct irq_data *data)
1892 {
1893         struct irq_info *info = info_for_irq(data->irq);
1894         evtchn_port_t evtchn = info ? info->evtchn : 0;
1895
1896         if (VALID_EVTCHN(evtchn)) {
1897                 do_mask(info, EVT_MASK_REASON_EOI_PENDING);
1898                 /*
1899                  * Don't call event_handler_exit().
1900                  * Need to keep is_active non-zero in order to ignore re-raised
1901                  * events after cpu affinity changes while a lateeoi is pending.
1902                  */
1903                 clear_evtchn(evtchn);
1904         }
1905 }
1906
1907 static void lateeoi_mask_ack_dynirq(struct irq_data *data)
1908 {
1909         struct irq_info *info = info_for_irq(data->irq);
1910         evtchn_port_t evtchn = info ? info->evtchn : 0;
1911
1912         if (VALID_EVTCHN(evtchn)) {
1913                 do_mask(info, EVT_MASK_REASON_EXPLICIT);
1914                 event_handler_exit(info);
1915         }
1916 }
1917
1918 static int retrigger_dynirq(struct irq_data *data)
1919 {
1920         struct irq_info *info = info_for_irq(data->irq);
1921         evtchn_port_t evtchn = info ? info->evtchn : 0;
1922
1923         if (!VALID_EVTCHN(evtchn))
1924                 return 0;
1925
1926         do_mask(info, EVT_MASK_REASON_TEMPORARY);
1927         set_evtchn(evtchn);
1928         do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1929
1930         return 1;
1931 }
1932
1933 static void restore_pirqs(void)
1934 {
1935         int pirq, rc, irq, gsi;
1936         struct physdev_map_pirq map_irq;
1937         struct irq_info *info;
1938
1939         list_for_each_entry(info, &xen_irq_list_head, list) {
1940                 if (info->type != IRQT_PIRQ)
1941                         continue;
1942
1943                 pirq = info->u.pirq.pirq;
1944                 gsi = info->u.pirq.gsi;
1945                 irq = info->irq;
1946
1947                 /* save/restore of PT devices doesn't work, so at this point the
1948                  * only devices present are GSI based emulated devices */
1949                 if (!gsi)
1950                         continue;
1951
1952                 map_irq.domid = DOMID_SELF;
1953                 map_irq.type = MAP_PIRQ_TYPE_GSI;
1954                 map_irq.index = gsi;
1955                 map_irq.pirq = pirq;
1956
1957                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
1958                 if (rc) {
1959                         pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
1960                                 gsi, irq, pirq, rc);
1961                         xen_free_irq(irq);
1962                         continue;
1963                 }
1964
1965                 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1966
1967                 __startup_pirq(irq);
1968         }
1969 }
1970
1971 static void restore_cpu_virqs(unsigned int cpu)
1972 {
1973         struct evtchn_bind_virq bind_virq;
1974         evtchn_port_t evtchn;
1975         int virq, irq;
1976
1977         for (virq = 0; virq < NR_VIRQS; virq++) {
1978                 if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
1979                         continue;
1980
1981                 BUG_ON(virq_from_irq(irq) != virq);
1982
1983                 /* Get a new binding from Xen. */
1984                 bind_virq.virq = virq;
1985                 bind_virq.vcpu = xen_vcpu_nr(cpu);
1986                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1987                                                 &bind_virq) != 0)
1988                         BUG();
1989                 evtchn = bind_virq.port;
1990
1991                 /* Record the new mapping. */
1992                 (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1993                 /* The affinity mask is still valid */
1994                 bind_evtchn_to_cpu(evtchn, cpu, false);
1995         }
1996 }
1997
1998 static void restore_cpu_ipis(unsigned int cpu)
1999 {
2000         struct evtchn_bind_ipi bind_ipi;
2001         evtchn_port_t evtchn;
2002         int ipi, irq;
2003
2004         for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
2005                 if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
2006                         continue;
2007
2008                 BUG_ON(ipi_from_irq(irq) != ipi);
2009
2010                 /* Get a new binding from Xen. */
2011                 bind_ipi.vcpu = xen_vcpu_nr(cpu);
2012                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
2013                                                 &bind_ipi) != 0)
2014                         BUG();
2015                 evtchn = bind_ipi.port;
2016
2017                 /* Record the new mapping. */
2018                 (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
2019                 /* The affinity mask is still valid */
2020                 bind_evtchn_to_cpu(evtchn, cpu, false);
2021         }
2022 }
2023
2024 /* Clear an irq's pending state, in preparation for polling on it */
2025 void xen_clear_irq_pending(int irq)
2026 {
2027         struct irq_info *info = info_for_irq(irq);
2028         evtchn_port_t evtchn = info ? info->evtchn : 0;
2029
2030         if (VALID_EVTCHN(evtchn))
2031                 event_handler_exit(info);
2032 }
2033 EXPORT_SYMBOL(xen_clear_irq_pending);
2034 void xen_set_irq_pending(int irq)
2035 {
2036         evtchn_port_t evtchn = evtchn_from_irq(irq);
2037
2038         if (VALID_EVTCHN(evtchn))
2039                 set_evtchn(evtchn);
2040 }
2041
2042 bool xen_test_irq_pending(int irq)
2043 {
2044         evtchn_port_t evtchn = evtchn_from_irq(irq);
2045         bool ret = false;
2046
2047         if (VALID_EVTCHN(evtchn))
2048                 ret = test_evtchn(evtchn);
2049
2050         return ret;
2051 }
2052
2053 /* Poll waiting for an irq to become pending with timeout.  In the usual case,
2054  * the irq will be disabled so it won't deliver an interrupt. */
2055 void xen_poll_irq_timeout(int irq, u64 timeout)
2056 {
2057         evtchn_port_t evtchn = evtchn_from_irq(irq);
2058
2059         if (VALID_EVTCHN(evtchn)) {
2060                 struct sched_poll poll;
2061
2062                 poll.nr_ports = 1;
2063                 poll.timeout = timeout;
2064                 set_xen_guest_handle(poll.ports, &evtchn);
2065
2066                 if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
2067                         BUG();
2068         }
2069 }
2070 EXPORT_SYMBOL(xen_poll_irq_timeout);
2071 /* Poll waiting for an irq to become pending.  In the usual case, the
2072  * irq will be disabled so it won't deliver an interrupt. */
2073 void xen_poll_irq(int irq)
2074 {
2075         xen_poll_irq_timeout(irq, 0 /* no timeout */);
2076 }
2077
2078 /* Check whether the IRQ line is shared with other guests. */
2079 int xen_test_irq_shared(int irq)
2080 {
2081         struct irq_info *info = info_for_irq(irq);
2082         struct physdev_irq_status_query irq_status;
2083
2084         if (WARN_ON(!info))
2085                 return -ENOENT;
2086
2087         irq_status.irq = info->u.pirq.pirq;
2088
2089         if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
2090                 return 0;
2091         return !(irq_status.flags & XENIRQSTAT_shared);
2092 }
2093 EXPORT_SYMBOL_GPL(xen_test_irq_shared);
2094
2095 void xen_irq_resume(void)
2096 {
2097         unsigned int cpu;
2098         struct irq_info *info;
2099
2100         /* New event-channel space is not 'live' yet. */
2101         xen_evtchn_resume();
2102
2103         /* No IRQ <-> event-channel mappings. */
2104         list_for_each_entry(info, &xen_irq_list_head, list) {
2105                 /* Zap event-channel binding */
2106                 info->evtchn = 0;
2107                 /* Adjust accounting */
2108                 channels_on_cpu_dec(info);
2109         }
2110
2111         clear_evtchn_to_irq_all();
2112
2113         for_each_possible_cpu(cpu) {
2114                 restore_cpu_virqs(cpu);
2115                 restore_cpu_ipis(cpu);
2116         }
2117
2118         restore_pirqs();
2119 }
2120
2121 static struct irq_chip xen_dynamic_chip __read_mostly = {
2122         .name                   = "xen-dyn",
2123
2124         .irq_disable            = disable_dynirq,
2125         .irq_mask               = disable_dynirq,
2126         .irq_unmask             = enable_dynirq,
2127
2128         .irq_ack                = ack_dynirq,
2129         .irq_mask_ack           = mask_ack_dynirq,
2130
2131         .irq_set_affinity       = set_affinity_irq,
2132         .irq_retrigger          = retrigger_dynirq,
2133 };
2134
2135 static struct irq_chip xen_lateeoi_chip __read_mostly = {
2136         /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
2137         .name                   = "xen-dyn-lateeoi",
2138
2139         .irq_disable            = disable_dynirq,
2140         .irq_mask               = disable_dynirq,
2141         .irq_unmask             = enable_dynirq,
2142
2143         .irq_ack                = lateeoi_ack_dynirq,
2144         .irq_mask_ack           = lateeoi_mask_ack_dynirq,
2145
2146         .irq_set_affinity       = set_affinity_irq,
2147         .irq_retrigger          = retrigger_dynirq,
2148 };
2149
2150 static struct irq_chip xen_pirq_chip __read_mostly = {
2151         .name                   = "xen-pirq",
2152
2153         .irq_startup            = startup_pirq,
2154         .irq_shutdown           = shutdown_pirq,
2155         .irq_enable             = enable_pirq,
2156         .irq_disable            = disable_pirq,
2157
2158         .irq_mask               = disable_dynirq,
2159         .irq_unmask             = enable_dynirq,
2160
2161         .irq_ack                = eoi_pirq,
2162         .irq_eoi                = eoi_pirq,
2163         .irq_mask_ack           = mask_ack_pirq,
2164
2165         .irq_set_affinity       = set_affinity_irq,
2166
2167         .irq_retrigger          = retrigger_dynirq,
2168 };
2169
2170 static struct irq_chip xen_percpu_chip __read_mostly = {
2171         .name                   = "xen-percpu",
2172
2173         .irq_disable            = disable_dynirq,
2174         .irq_mask               = disable_dynirq,
2175         .irq_unmask             = enable_dynirq,
2176
2177         .irq_ack                = ack_dynirq,
2178 };
2179
2180 #ifdef CONFIG_X86
2181 #ifdef CONFIG_XEN_PVHVM
2182 /* Vector callbacks are better than PCI interrupts to receive event
2183  * channel notifications because we can receive vector callbacks on any
2184  * vcpu and we don't need PCI support or APIC interactions. */
2185 void xen_setup_callback_vector(void)
2186 {
2187         uint64_t callback_via;
2188
2189         if (xen_have_vector_callback) {
2190                 callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
2191                 if (xen_set_callback_via(callback_via)) {
2192                         pr_err("Request for Xen HVM callback vector failed\n");
2193                         xen_have_vector_callback = false;
2194                 }
2195         }
2196 }
2197
2198 /*
2199  * Setup per-vCPU vector-type callbacks. If this setup is unavailable,
2200  * fallback to the global vector-type callback.
2201  */
2202 static __init void xen_init_setup_upcall_vector(void)
2203 {
2204         if (!xen_have_vector_callback)
2205                 return;
2206
2207         if ((cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_UPCALL_VECTOR) &&
2208             !xen_set_upcall_vector(0))
2209                 xen_percpu_upcall = true;
2210         else if (xen_feature(XENFEAT_hvm_callback_vector))
2211                 xen_setup_callback_vector();
2212         else
2213                 xen_have_vector_callback = false;
2214 }
2215
2216 int xen_set_upcall_vector(unsigned int cpu)
2217 {
2218         int rc;
2219         xen_hvm_evtchn_upcall_vector_t op = {
2220                 .vector = HYPERVISOR_CALLBACK_VECTOR,
2221                 .vcpu = per_cpu(xen_vcpu_id, cpu),
2222         };
2223
2224         rc = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &op);
2225         if (rc)
2226                 return rc;
2227
2228         /* Trick toolstack to think we are enlightened. */
2229         if (!cpu)
2230                 rc = xen_set_callback_via(1);
2231
2232         return rc;
2233 }
2234
2235 static __init void xen_alloc_callback_vector(void)
2236 {
2237         if (!xen_have_vector_callback)
2238                 return;
2239
2240         pr_info("Xen HVM callback vector for event delivery is enabled\n");
2241         alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback);
2242 }
2243 #else
2244 void xen_setup_callback_vector(void) {}
2245 static inline void xen_init_setup_upcall_vector(void) {}
2246 int xen_set_upcall_vector(unsigned int cpu) {}
2247 static inline void xen_alloc_callback_vector(void) {}
2248 #endif /* CONFIG_XEN_PVHVM */
2249 #endif /* CONFIG_X86 */
2250
2251 bool xen_fifo_events = true;
2252 module_param_named(fifo_events, xen_fifo_events, bool, 0);
2253
2254 static int xen_evtchn_cpu_prepare(unsigned int cpu)
2255 {
2256         int ret = 0;
2257
2258         xen_cpu_init_eoi(cpu);
2259
2260         if (evtchn_ops->percpu_init)
2261                 ret = evtchn_ops->percpu_init(cpu);
2262
2263         return ret;
2264 }
2265
2266 static int xen_evtchn_cpu_dead(unsigned int cpu)
2267 {
2268         int ret = 0;
2269
2270         if (evtchn_ops->percpu_deinit)
2271                 ret = evtchn_ops->percpu_deinit(cpu);
2272
2273         return ret;
2274 }
2275
2276 void __init xen_init_IRQ(void)
2277 {
2278         int ret = -EINVAL;
2279         evtchn_port_t evtchn;
2280
2281         if (xen_fifo_events)
2282                 ret = xen_evtchn_fifo_init();
2283         if (ret < 0) {
2284                 xen_evtchn_2l_init();
2285                 xen_fifo_events = false;
2286         }
2287
2288         xen_cpu_init_eoi(smp_processor_id());
2289
2290         cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
2291                                   "xen/evtchn:prepare",
2292                                   xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
2293
2294         evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
2295                                 sizeof(*evtchn_to_irq), GFP_KERNEL);
2296         BUG_ON(!evtchn_to_irq);
2297
2298         /* No event channels are 'live' right now. */
2299         for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
2300                 mask_evtchn(evtchn);
2301
2302         pirq_needs_eoi = pirq_needs_eoi_flag;
2303
2304 #ifdef CONFIG_X86
2305         if (xen_pv_domain()) {
2306                 if (xen_initial_domain())
2307                         pci_xen_initial_domain();
2308         }
2309         xen_init_setup_upcall_vector();
2310         xen_alloc_callback_vector();
2311
2312
2313         if (xen_hvm_domain()) {
2314                 native_init_IRQ();
2315                 /* pci_xen_hvm_init must be called after native_init_IRQ so that
2316                  * __acpi_register_gsi can point at the right function */
2317                 pci_xen_hvm_init();
2318         } else {
2319                 int rc;
2320                 struct physdev_pirq_eoi_gmfn eoi_gmfn;
2321
2322                 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
2323                 eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
2324                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
2325                 if (rc != 0) {
2326                         free_page((unsigned long) pirq_eoi_map);
2327                         pirq_eoi_map = NULL;
2328                 } else
2329                         pirq_needs_eoi = pirq_check_eoi_map;
2330         }
2331 #endif
2332 }