virt/kvm/arm/vgic/vgic.c

   1 /*
   2  * Copyright (C) 2015, 2016 ARM Ltd.
   3  *
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License version 2 as
   6  * published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11  * GNU General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public License
  14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  15  */
  16
  17 #include <linux/interrupt.h>
  18 #include <linux/irq.h>
  19 #include <linux/kvm.h>
  20 #include <linux/kvm_host.h>
  21 #include <linux/list_sort.h>
  22 #include <linux/nospec.h>
  23
  24 #include <asm/kvm_hyp.h>
  25
  26 #include "vgic.h"
  27
  28 #define CREATE_TRACE_POINTS
  29 #include "trace.h"
  30
  31 struct vgic_global kvm_vgic_global_state __ro_after_init = {
  32         .gicv3_cpuif = STATIC_KEY_FALSE_INIT,
  33 };
  34
  35 /*
  36  * Locking order is always:
  37  * kvm->lock (mutex)
  38  *   its->cmd_lock (mutex)
  39  *     its->its_lock (mutex)
  40  *       vgic_cpu->ap_list_lock         must be taken with IRQs disabled
  41  *         kvm->lpi_list_lock           must be taken with IRQs disabled
  42  *           vgic_irq->irq_lock         must be taken with IRQs disabled
  43  *
  44  * As the ap_list_lock might be taken from the timer interrupt handler,
  45  * we have to disable IRQs before taking this lock and everything lower
  46  * than it.
  47  *
  48  * If you need to take multiple locks, always take the upper lock first,
  49  * then the lower ones, e.g. first take the its_lock, then the irq_lock.
  50  * If you are already holding a lock and need to take a higher one, you
  51  * have to drop the lower ranking lock first and re-aquire it after having
  52  * taken the upper one.
  53  *
  54  * When taking more than one ap_list_lock at the same time, always take the
  55  * lowest numbered VCPU's ap_list_lock first, so:
  56  *   vcpuX->vcpu_id < vcpuY->vcpu_id:
  57  *     spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
  58  *     spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
  59  *
  60  * Since the VGIC must support injecting virtual interrupts from ISRs, we have
  61  * to use the spin_lock_irqsave/spin_unlock_irqrestore versions of outer
  62  * spinlocks for any lock that may be taken while injecting an interrupt.
  63  */
  64
  65 /*
  66  * Iterate over the VM's list of mapped LPIs to find the one with a
  67  * matching interrupt ID and return a reference to the IRQ structure.
  68  */
  69 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
  70 {
  71         struct vgic_dist *dist = &kvm->arch.vgic;
  72         struct vgic_irq *irq = NULL;
  73         unsigned long flags;
  74
  75         raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
  76
  77         list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
  78                 if (irq->intid != intid)
  79                         continue;
  80
  81                 /*
  82                  * This increases the refcount, the caller is expected to
  83                  * call vgic_put_irq() later once it's finished with the IRQ.
  84                  */
  85                 vgic_get_irq_kref(irq);
  86                 goto out_unlock;
  87         }
  88         irq = NULL;
  89
  90 out_unlock:
  91         raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
  92
  93         return irq;
  94 }
  95
  96 /*
  97  * This looks up the virtual interrupt ID to get the corresponding
  98  * struct vgic_irq. It also increases the refcount, so any caller is expected
  99  * to call vgic_put_irq() once it's finished with this IRQ.
 100  */
 101 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
 102                               u32 intid)
 103 {
 104         /* SGIs and PPIs */
 105         if (intid <= VGIC_MAX_PRIVATE) {
 106                 intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1);
 107                 return &vcpu->arch.vgic_cpu.private_irqs[intid];
 108         }
 109
 110         /* SPIs */
 111         if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
 112                 intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS);
 113                 return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
 114         }
 115
 116         /* LPIs */
 117         if (intid >= VGIC_MIN_LPI)
 118                 return vgic_get_lpi(kvm, intid);
 119
 120         WARN(1, "Looking up struct vgic_irq for reserved INTID");
 121         return NULL;
 122 }
 123
 124 /*
 125  * We can't do anything in here, because we lack the kvm pointer to
 126  * lock and remove the item from the lpi_list. So we keep this function
 127  * empty and use the return value of kref_put() to trigger the freeing.
 128  */
 129 static void vgic_irq_release(struct kref *ref)
 130 {
 131 }
 132
 133 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
 134 {
 135         struct vgic_dist *dist = &kvm->arch.vgic;
 136         unsigned long flags;
 137
 138         if (irq->intid < VGIC_MIN_LPI)
 139                 return;
 140
 141         raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
 142         if (!kref_put(&irq->refcount, vgic_irq_release)) {
 143                 raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 144                 return;
 145         };
 146
 147         list_del(&irq->lpi_list);
 148         dist->lpi_list_count--;
 149         raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 150
 151         kfree(irq);
 152 }
 153
 154 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
 155 {
 156         WARN_ON(irq_set_irqchip_state(irq->host_irq,
 157                                       IRQCHIP_STATE_PENDING,
 158                                       pending));
 159 }
 160
 161 bool vgic_get_phys_line_level(struct vgic_irq *irq)
 162 {
 163         bool line_level;
 164
 165         BUG_ON(!irq->hw);
 166
 167         if (irq->get_input_level)
 168                 return irq->get_input_level(irq->intid);
 169
 170         WARN_ON(irq_get_irqchip_state(irq->host_irq,
 171                                       IRQCHIP_STATE_PENDING,
 172                                       &line_level));
 173         return line_level;
 174 }
 175
 176 /* Set/Clear the physical active state */
 177 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
 178 {
 179
 180         BUG_ON(!irq->hw);
 181         WARN_ON(irq_set_irqchip_state(irq->host_irq,
 182                                       IRQCHIP_STATE_ACTIVE,
 183                                       active));
 184 }
 185
 186 /**
 187  * kvm_vgic_target_oracle - compute the target vcpu for an irq
 188  *
 189  * @irq:        The irq to route. Must be already locked.
 190  *
 191  * Based on the current state of the interrupt (enabled, pending,
 192  * active, vcpu and target_vcpu), compute the next vcpu this should be
 193  * given to. Return NULL if this shouldn't be injected at all.
 194  *
 195  * Requires the IRQ lock to be held.
 196  */
 197 static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
 198 {
 199         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
 200
 201         /* If the interrupt is active, it must stay on the current vcpu */
 202         if (irq->active)
 203                 return irq->vcpu ? : irq->target_vcpu;
 204
 205         /*
 206          * If the IRQ is not active but enabled and pending, we should direct
 207          * it to its configured target VCPU.
 208          * If the distributor is disabled, pending interrupts shouldn't be
 209          * forwarded.
 210          */
 211         if (irq->enabled && irq_is_pending(irq)) {
 212                 if (unlikely(irq->target_vcpu &&
 213                              !irq->target_vcpu->kvm->arch.vgic.enabled))
 214                         return NULL;
 215
 216                 return irq->target_vcpu;
 217         }
 218
 219         /* If neither active nor pending and enabled, then this IRQ should not
 220          * be queued to any VCPU.
 221          */
 222         return NULL;
 223 }
 224
 225 /*
 226  * The order of items in the ap_lists defines how we'll pack things in LRs as
 227  * well, the first items in the list being the first things populated in the
 228  * LRs.
 229  *
 230  * A hard rule is that active interrupts can never be pushed out of the LRs
 231  * (and therefore take priority) since we cannot reliably trap on deactivation
 232  * of IRQs and therefore they have to be present in the LRs.
 233  *
 234  * Otherwise things should be sorted by the priority field and the GIC
 235  * hardware support will take care of preemption of priority groups etc.
 236  *
 237  * Return negative if "a" sorts before "b", 0 to preserve order, and positive
 238  * to sort "b" before "a".
 239  */
 240 static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
 241 {
 242         struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
 243         struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
 244         bool penda, pendb;
 245         int ret;
 246
 247         /*
 248          * list_sort may call this function with the same element when
 249          * the list is fairly long.
 250          */
 251         if (unlikely(irqa == irqb))
 252                 return 0;
 253
 254         spin_lock(&irqa->irq_lock);
 255         spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
 256
 257         if (irqa->active || irqb->active) {
 258                 ret = (int)irqb->active - (int)irqa->active;
 259                 goto out;
 260         }
 261
 262         penda = irqa->enabled && irq_is_pending(irqa);
 263         pendb = irqb->enabled && irq_is_pending(irqb);
 264
 265         if (!penda || !pendb) {
 266                 ret = (int)pendb - (int)penda;
 267                 goto out;
 268         }
 269
 270         /* Both pending and enabled, sort by priority */
 271         ret = irqa->priority - irqb->priority;
 272 out:
 273         spin_unlock(&irqb->irq_lock);
 274         spin_unlock(&irqa->irq_lock);
 275         return ret;
 276 }
 277
 278 /* Must be called with the ap_list_lock held */
 279 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
 280 {
 281         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 282
 283         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 284
 285         list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
 286 }
 287
 288 /*
 289  * Only valid injection if changing level for level-triggered IRQs or for a
 290  * rising edge, and in-kernel connected IRQ lines can only be controlled by
 291  * their owner.
 292  */
 293 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
 294 {
 295         if (irq->owner != owner)
 296                 return false;
 297
 298         switch (irq->config) {
 299         case VGIC_CONFIG_LEVEL:
 300                 return irq->line_level != level;
 301         case VGIC_CONFIG_EDGE:
 302                 return level;
 303         }
 304
 305         return false;
 306 }
 307
 308 /*
 309  * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
 310  * Do the queuing if necessary, taking the right locks in the right order.
 311  * Returns true when the IRQ was queued, false otherwise.
 312  *
 313  * Needs to be entered with the IRQ lock already held, but will return
 314  * with all locks dropped.
 315  */
 316 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
 317                            unsigned long flags)
 318 {
 319         struct kvm_vcpu *vcpu;
 320
 321         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
 322
 323 retry:
 324         vcpu = vgic_target_oracle(irq);
 325         if (irq->vcpu || !vcpu) {
 326                 /*
 327                  * If this IRQ is already on a VCPU's ap_list, then it
 328                  * cannot be moved or modified and there is no more work for
 329                  * us to do.
 330                  *
 331                  * Otherwise, if the irq is not pending and enabled, it does
 332                  * not need to be inserted into an ap_list and there is also
 333                  * no more work for us to do.
 334                  */
 335                 spin_unlock_irqrestore(&irq->irq_lock, flags);
 336
 337                 /*
 338                  * We have to kick the VCPU here, because we could be
 339                  * queueing an edge-triggered interrupt for which we
 340                  * get no EOI maintenance interrupt. In that case,
 341                  * while the IRQ is already on the VCPU's AP list, the
 342                  * VCPU could have EOI'ed the original interrupt and
 343                  * won't see this one until it exits for some other
 344                  * reason.
 345                  */
 346                 if (vcpu) {
 347                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 348                         kvm_vcpu_kick(vcpu);
 349                 }
 350                 return false;
 351         }
 352
 353         /*
 354          * We must unlock the irq lock to take the ap_list_lock where
 355          * we are going to insert this new pending interrupt.
 356          */
 357         spin_unlock_irqrestore(&irq->irq_lock, flags);
 358
 359         /* someone can do stuff here, which we re-check below */
 360
 361         spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 362         spin_lock(&irq->irq_lock);
 363
 364         /*
 365          * Did something change behind our backs?
 366          *
 367          * There are two cases:
 368          * 1) The irq lost its pending state or was disabled behind our
 369          *    backs and/or it was queued to another VCPU's ap_list.
 370          * 2) Someone changed the affinity on this irq behind our
 371          *    backs and we are now holding the wrong ap_list_lock.
 372          *
 373          * In both cases, drop the locks and retry.
 374          */
 375
 376         if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
 377                 spin_unlock(&irq->irq_lock);
 378                 spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 379
 380                 spin_lock_irqsave(&irq->irq_lock, flags);
 381                 goto retry;
 382         }
 383
 384         /*
 385          * Grab a reference to the irq to reflect the fact that it is
 386          * now in the ap_list.
 387          */
 388         vgic_get_irq_kref(irq);
 389         list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
 390         irq->vcpu = vcpu;
 391
 392         spin_unlock(&irq->irq_lock);
 393         spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 394
 395         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 396         kvm_vcpu_kick(vcpu);
 397
 398         return true;
 399 }
 400
 401 /**
 402  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
 403  * @kvm:     The VM structure pointer
 404  * @cpuid:   The CPU for PPIs
 405  * @intid:   The INTID to inject a new state to.
 406  * @level:   Edge-triggered:  true:  to trigger the interrupt
 407  *                            false: to ignore the call
 408  *           Level-sensitive  true:  raise the input signal
 409  *                            false: lower the input signal
 410  * @owner:   The opaque pointer to the owner of the IRQ being raised to verify
 411  *           that the caller is allowed to inject this IRQ.  Userspace
 412  *           injections will have owner == NULL.
 413  *
 414  * The VGIC is not concerned with devices being active-LOW or active-HIGH for
 415  * level-sensitive interrupts.  You can think of the level parameter as 1
 416  * being HIGH and 0 being LOW and all devices being active-HIGH.
 417  */
 418 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
 419                         bool level, void *owner)
 420 {
 421         struct kvm_vcpu *vcpu;
 422         struct vgic_irq *irq;
 423         unsigned long flags;
 424         int ret;
 425
 426         trace_vgic_update_irq_pending(cpuid, intid, level);
 427
 428         ret = vgic_lazy_init(kvm);
 429         if (ret)
 430                 return ret;
 431
 432         vcpu = kvm_get_vcpu(kvm, cpuid);
 433         if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
 434                 return -EINVAL;
 435
 436         irq = vgic_get_irq(kvm, vcpu, intid);
 437         if (!irq)
 438                 return -EINVAL;
 439
 440         spin_lock_irqsave(&irq->irq_lock, flags);
 441
 442         if (!vgic_validate_injection(irq, level, owner)) {
 443                 /* Nothing to see here, move along... */
 444                 spin_unlock_irqrestore(&irq->irq_lock, flags);
 445                 vgic_put_irq(kvm, irq);
 446                 return 0;
 447         }
 448
 449         if (irq->config == VGIC_CONFIG_LEVEL)
 450                 irq->line_level = level;
 451         else
 452                 irq->pending_latch = true;
 453
 454         vgic_queue_irq_unlock(kvm, irq, flags);
 455         vgic_put_irq(kvm, irq);
 456
 457         return 0;
 458 }
 459
 460 /* @irq->irq_lock must be held */
 461 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
 462                             unsigned int host_irq,
 463                             bool (*get_input_level)(int vindid))
 464 {
 465         struct irq_desc *desc;
 466         struct irq_data *data;
 467
 468         /*
 469          * Find the physical IRQ number corresponding to @host_irq
 470          */
 471         desc = irq_to_desc(host_irq);
 472         if (!desc) {
 473                 kvm_err("%s: no interrupt descriptor\n", __func__);
 474                 return -EINVAL;
 475         }
 476         data = irq_desc_get_irq_data(desc);
 477         while (data->parent_data)
 478                 data = data->parent_data;
 479
 480         irq->hw = true;
 481         irq->host_irq = host_irq;
 482         irq->hwintid = data->hwirq;
 483         irq->get_input_level = get_input_level;
 484         return 0;
 485 }
 486
 487 /* @irq->irq_lock must be held */
 488 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
 489 {
 490         irq->hw = false;
 491         irq->hwintid = 0;
 492         irq->get_input_level = NULL;
 493 }
 494
 495 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
 496                           u32 vintid, bool (*get_input_level)(int vindid))
 497 {
 498         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 499         unsigned long flags;
 500         int ret;
 501
 502         BUG_ON(!irq);
 503
 504         spin_lock_irqsave(&irq->irq_lock, flags);
 505         ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
 506         spin_unlock_irqrestore(&irq->irq_lock, flags);
 507         vgic_put_irq(vcpu->kvm, irq);
 508
 509         return ret;
 510 }
 511
 512 /**
 513  * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
 514  * @vcpu: The VCPU pointer
 515  * @vintid: The INTID of the interrupt
 516  *
 517  * Reset the active and pending states of a mapped interrupt.  Kernel
 518  * subsystems injecting mapped interrupts should reset their interrupt lines
 519  * when we are doing a reset of the VM.
 520  */
 521 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
 522 {
 523         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 524         unsigned long flags;
 525
 526         if (!irq->hw)
 527                 goto out;
 528
 529         spin_lock_irqsave(&irq->irq_lock, flags);
 530         irq->active = false;
 531         irq->pending_latch = false;
 532         irq->line_level = false;
 533         spin_unlock_irqrestore(&irq->irq_lock, flags);
 534 out:
 535         vgic_put_irq(vcpu->kvm, irq);
 536 }
 537
 538 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
 539 {
 540         struct vgic_irq *irq;
 541         unsigned long flags;
 542
 543         if (!vgic_initialized(vcpu->kvm))
 544                 return -EAGAIN;
 545
 546         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 547         BUG_ON(!irq);
 548
 549         spin_lock_irqsave(&irq->irq_lock, flags);
 550         kvm_vgic_unmap_irq(irq);
 551         spin_unlock_irqrestore(&irq->irq_lock, flags);
 552         vgic_put_irq(vcpu->kvm, irq);
 553
 554         return 0;
 555 }
 556
 557 /**
 558  * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
 559  *
 560  * @vcpu:   Pointer to the VCPU (used for PPIs)
 561  * @intid:  The virtual INTID identifying the interrupt (PPI or SPI)
 562  * @owner:  Opaque pointer to the owner
 563  *
 564  * Returns 0 if intid is not already used by another in-kernel device and the
 565  * owner is set, otherwise returns an error code.
 566  */
 567 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
 568 {
 569         struct vgic_irq *irq;
 570         unsigned long flags;
 571         int ret = 0;
 572
 573         if (!vgic_initialized(vcpu->kvm))
 574                 return -EAGAIN;
 575
 576         /* SGIs and LPIs cannot be wired up to any device */
 577         if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
 578                 return -EINVAL;
 579
 580         irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
 581         spin_lock_irqsave(&irq->irq_lock, flags);
 582         if (irq->owner && irq->owner != owner)
 583                 ret = -EEXIST;
 584         else
 585                 irq->owner = owner;
 586         spin_unlock_irqrestore(&irq->irq_lock, flags);
 587
 588         return ret;
 589 }
 590
 591 /**
 592  * vgic_prune_ap_list - Remove non-relevant interrupts from the list
 593  *
 594  * @vcpu: The VCPU pointer
 595  *
 596  * Go over the list of "interesting" interrupts, and prune those that we
 597  * won't have to consider in the near future.
 598  */
 599 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
 600 {
 601         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 602         struct vgic_irq *irq, *tmp;
 603
 604         DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 605
 606 retry:
 607         spin_lock(&vgic_cpu->ap_list_lock);
 608
 609         list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
 610                 struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
 611                 bool target_vcpu_needs_kick = false;
 612
 613                 spin_lock(&irq->irq_lock);
 614
 615                 BUG_ON(vcpu != irq->vcpu);
 616
 617                 target_vcpu = vgic_target_oracle(irq);
 618
 619                 if (!target_vcpu) {
 620                         /*
 621                          * We don't need to process this interrupt any
 622                          * further, move it off the list.
 623                          */
 624                         list_del(&irq->ap_list);
 625                         irq->vcpu = NULL;
 626                         spin_unlock(&irq->irq_lock);
 627
 628                         /*
 629                          * This vgic_put_irq call matches the
 630                          * vgic_get_irq_kref in vgic_queue_irq_unlock,
 631                          * where we added the LPI to the ap_list. As
 632                          * we remove the irq from the list, we drop
 633                          * also drop the refcount.
 634                          */
 635                         vgic_put_irq(vcpu->kvm, irq);
 636                         continue;
 637                 }
 638
 639                 if (target_vcpu == vcpu) {
 640                         /* We're on the right CPU */
 641                         spin_unlock(&irq->irq_lock);
 642                         continue;
 643                 }
 644
 645                 /* This interrupt looks like it has to be migrated. */
 646
 647                 spin_unlock(&irq->irq_lock);
 648                 spin_unlock(&vgic_cpu->ap_list_lock);
 649
 650                 /*
 651                  * Ensure locking order by always locking the smallest
 652                  * ID first.
 653                  */
 654                 if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
 655                         vcpuA = vcpu;
 656                         vcpuB = target_vcpu;
 657                 } else {
 658                         vcpuA = target_vcpu;
 659                         vcpuB = vcpu;
 660                 }
 661
 662                 spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
 663                 spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
 664                                  SINGLE_DEPTH_NESTING);
 665                 spin_lock(&irq->irq_lock);
 666
 667                 /*
 668                  * If the affinity has been preserved, move the
 669                  * interrupt around. Otherwise, it means things have
 670                  * changed while the interrupt was unlocked, and we
 671                  * need to replay this.
 672                  *
 673                  * In all cases, we cannot trust the list not to have
 674                  * changed, so we restart from the beginning.
 675                  */
 676                 if (target_vcpu == vgic_target_oracle(irq)) {
 677                         struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
 678
 679                         list_del(&irq->ap_list);
 680                         irq->vcpu = target_vcpu;
 681                         list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
 682                         target_vcpu_needs_kick = true;
 683                 }
 684
 685                 spin_unlock(&irq->irq_lock);
 686                 spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
 687                 spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
 688
 689                 if (target_vcpu_needs_kick) {
 690                         kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
 691                         kvm_vcpu_kick(target_vcpu);
 692                 }
 693
 694                 goto retry;
 695         }
 696
 697         spin_unlock(&vgic_cpu->ap_list_lock);
 698 }
 699
 700 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
 701 {
 702         if (kvm_vgic_global_state.type == VGIC_V2)
 703                 vgic_v2_fold_lr_state(vcpu);
 704         else
 705                 vgic_v3_fold_lr_state(vcpu);
 706 }
 707
 708 /* Requires the irq_lock to be held. */
 709 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
 710                                     struct vgic_irq *irq, int lr)
 711 {
 712         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
 713
 714         if (kvm_vgic_global_state.type == VGIC_V2)
 715                 vgic_v2_populate_lr(vcpu, irq, lr);
 716         else
 717                 vgic_v3_populate_lr(vcpu, irq, lr);
 718 }
 719
 720 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
 721 {
 722         if (kvm_vgic_global_state.type == VGIC_V2)
 723                 vgic_v2_clear_lr(vcpu, lr);
 724         else
 725                 vgic_v3_clear_lr(vcpu, lr);
 726 }
 727
 728 static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
 729 {
 730         if (kvm_vgic_global_state.type == VGIC_V2)
 731                 vgic_v2_set_underflow(vcpu);
 732         else
 733                 vgic_v3_set_underflow(vcpu);
 734 }
 735
 736 /* Requires the ap_list_lock to be held. */
 737 static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
 738                                  bool *multi_sgi)
 739 {
 740         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 741         struct vgic_irq *irq;
 742         int count = 0;
 743
 744         *multi_sgi = false;
 745
 746         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 747
 748         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 749                 int w;
 750
 751                 spin_lock(&irq->irq_lock);
 752                 /* GICv2 SGIs can count for more than one... */
 753                 w = vgic_irq_get_lr_count(irq);
 754                 spin_unlock(&irq->irq_lock);
 755
 756                 count += w;
 757                 *multi_sgi |= (w > 1);
 758         }
 759         return count;
 760 }
 761
 762 /* Requires the VCPU's ap_list_lock to be held. */
 763 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 764 {
 765         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 766         struct vgic_irq *irq;
 767         int count;
 768         bool multi_sgi;
 769         u8 prio = 0xff;
 770
 771         DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
 772
 773         count = compute_ap_list_depth(vcpu, &multi_sgi);
 774         if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
 775                 vgic_sort_ap_list(vcpu);
 776
 777         count = 0;
 778
 779         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 780                 spin_lock(&irq->irq_lock);
 781
 782                 /*
 783                  * If we have multi-SGIs in the pipeline, we need to
 784                  * guarantee that they are all seen before any IRQ of
 785                  * lower priority. In that case, we need to filter out
 786                  * these interrupts by exiting early. This is easy as
 787                  * the AP list has been sorted already.
 788                  */
 789                 if (multi_sgi && irq->priority > prio) {
 790                         spin_unlock(&irq->irq_lock);
 791                         break;
 792                 }
 793
 794                 if (likely(vgic_target_oracle(irq) == vcpu)) {
 795                         vgic_populate_lr(vcpu, irq, count++);
 796
 797                         if (irq->source)
 798                                 prio = irq->priority;
 799                 }
 800
 801                 spin_unlock(&irq->irq_lock);
 802
 803                 if (count == kvm_vgic_global_state.nr_lr) {
 804                         if (!list_is_last(&irq->ap_list,
 805                                           &vgic_cpu->ap_list_head))
 806                                 vgic_set_underflow(vcpu);
 807                         break;
 808                 }
 809         }
 810
 811         vcpu->arch.vgic_cpu.used_lrs = count;
 812
 813         /* Nuke remaining LRs */
 814         for ( ; count < kvm_vgic_global_state.nr_lr; count++)
 815                 vgic_clear_lr(vcpu, count);
 816 }
 817
 818 static inline bool can_access_vgic_from_kernel(void)
 819 {
 820         /*
 821          * GICv2 can always be accessed from the kernel because it is
 822          * memory-mapped, and VHE systems can access GICv3 EL2 system
 823          * registers.
 824          */
 825         return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
 826 }
 827
 828 static inline void vgic_save_state(struct kvm_vcpu *vcpu)
 829 {
 830         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 831                 vgic_v2_save_state(vcpu);
 832         else
 833                 __vgic_v3_save_state(vcpu);
 834 }
 835
 836 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
 837 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 838 {
 839         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 840
 841         WARN_ON(vgic_v4_sync_hwstate(vcpu));
 842
 843         /* An empty ap_list_head implies used_lrs == 0 */
 844         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 845                 return;
 846
 847         if (can_access_vgic_from_kernel())
 848                 vgic_save_state(vcpu);
 849
 850         if (vgic_cpu->used_lrs)
 851                 vgic_fold_lr_state(vcpu);
 852         vgic_prune_ap_list(vcpu);
 853 }
 854
 855 static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
 856 {
 857         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 858                 vgic_v2_restore_state(vcpu);
 859         else
 860                 __vgic_v3_restore_state(vcpu);
 861 }
 862
 863 /* Flush our emulation state into the GIC hardware before entering the guest. */
 864 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 865 {
 866         WARN_ON(vgic_v4_flush_hwstate(vcpu));
 867
 868         /*
 869          * If there are no virtual interrupts active or pending for this
 870          * VCPU, then there is no work to do and we can bail out without
 871          * taking any lock.  There is a potential race with someone injecting
 872          * interrupts to the VCPU, but it is a benign race as the VCPU will
 873          * either observe the new interrupt before or after doing this check,
 874          * and introducing additional synchronization mechanism doesn't change
 875          * this.
 876          */
 877         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 878                 return;
 879
 880         DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 881
 882         spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
 883         vgic_flush_lr_state(vcpu);
 884         spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
 885
 886         if (can_access_vgic_from_kernel())
 887                 vgic_restore_state(vcpu);
 888 }
 889
 890 void kvm_vgic_load(struct kvm_vcpu *vcpu)
 891 {
 892         if (unlikely(!vgic_initialized(vcpu->kvm)))
 893                 return;
 894
 895         if (kvm_vgic_global_state.type == VGIC_V2)
 896                 vgic_v2_load(vcpu);
 897         else
 898                 vgic_v3_load(vcpu);
 899 }
 900
 901 void kvm_vgic_put(struct kvm_vcpu *vcpu)
 902 {
 903         if (unlikely(!vgic_initialized(vcpu->kvm)))
 904                 return;
 905
 906         if (kvm_vgic_global_state.type == VGIC_V2)
 907                 vgic_v2_put(vcpu);
 908         else
 909                 vgic_v3_put(vcpu);
 910 }
 911
 912 void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
 913 {
 914         if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
 915                 return;
 916
 917         if (kvm_vgic_global_state.type == VGIC_V2)
 918                 vgic_v2_vmcr_sync(vcpu);
 919         else
 920                 vgic_v3_vmcr_sync(vcpu);
 921 }
 922
 923 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 924 {
 925         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 926         struct vgic_irq *irq;
 927         bool pending = false;
 928         unsigned long flags;
 929
 930         if (!vcpu->kvm->arch.vgic.enabled)
 931                 return false;
 932
 933         if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
 934                 return true;
 935
 936         spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
 937
 938         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 939                 spin_lock(&irq->irq_lock);
 940                 pending = irq_is_pending(irq) && irq->enabled;
 941                 spin_unlock(&irq->irq_lock);
 942
 943                 if (pending)
 944                         break;
 945         }
 946
 947         spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
 948
 949         return pending;
 950 }
 951
 952 void vgic_kick_vcpus(struct kvm *kvm)
 953 {
 954         struct kvm_vcpu *vcpu;
 955         int c;
 956
 957         /*
 958          * We've injected an interrupt, time to find out who deserves
 959          * a good kick...
 960          */
 961         kvm_for_each_vcpu(c, vcpu, kvm) {
 962                 if (kvm_vgic_vcpu_pending_irq(vcpu)) {
 963                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 964                         kvm_vcpu_kick(vcpu);
 965                 }
 966         }
 967 }
 968
 969 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
 970 {
 971         struct vgic_irq *irq;
 972         bool map_is_active;
 973         unsigned long flags;
 974
 975         if (!vgic_initialized(vcpu->kvm))
 976                 return false;
 977
 978         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 979         spin_lock_irqsave(&irq->irq_lock, flags);
 980         map_is_active = irq->hw && irq->active;
 981         spin_unlock_irqrestore(&irq->irq_lock, flags);
 982         vgic_put_irq(vcpu->kvm, irq);
 983
 984         return map_is_active;
 985 }
 986