arch/x86/xen/smp.c

   1 /*
   2  * Xen SMP support
   3  *
   4  * This file implements the Xen versions of smp_ops.  SMP under Xen is
   5  * very straightforward.  Bringing a CPU up is simply a matter of
   6  * loading its initial context and setting it running.
   7  *
   8  * IPIs are handled through the Xen event mechanism.
   9  *
  10  * Because virtual CPUs can be scheduled onto any real CPU, there's no
  11  * useful topology information for the kernel to make use of.  As a
  12  * result, all CPUs are treated as if they're single-core and
  13  * single-threaded.
  14  */
  15 #include <linux/sched.h>
  16 #include <linux/err.h>
  17 #include <linux/slab.h>
  18 #include <linux/smp.h>
  19 #include <linux/irq_work.h>
  20 #include <linux/tick.h>
  21
  22 #include <asm/paravirt.h>
  23 #include <asm/desc.h>
  24 #include <asm/pgtable.h>
  25 #include <asm/cpu.h>
  26
  27 #include <xen/interface/xen.h>
  28 #include <xen/interface/vcpu.h>
  29 #include <xen/interface/xenpmu.h>
  30
  31 #include <asm/spec-ctrl.h>
  32 #include <asm/xen/interface.h>
  33 #include <asm/xen/hypercall.h>
  34
  35 #include <xen/xen.h>
  36 #include <xen/page.h>
  37 #include <xen/events.h>
  38
  39 #include <xen/hvc-console.h>
  40 #include "xen-ops.h"
  41 #include "mmu.h"
  42 #include "smp.h"
  43 #include "pmu.h"
  44
  45 cpumask_var_t xen_cpu_initialized_map;
  46
  47 struct xen_common_irq {
  48         int irq;
  49         char *name;
  50 };
  51 static DEFINE_PER_CPU(struct xen_common_irq, xen_resched_irq) = { .irq = -1 };
  52 static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 };
  53 static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };
  54 static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
  55 static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
  56 static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
  57
  58 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
  59 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
  60 static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id);
  61
  62 /*
  63  * Reschedule call back.
  64  */
  65 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
  66 {
  67         inc_irq_stat(irq_resched_count);
  68         scheduler_ipi();
  69
  70         return IRQ_HANDLED;
  71 }
  72
  73 static void cpu_bringup(void)
  74 {
  75         int cpu;
  76
  77         cpu_init();
  78         touch_softlockup_watchdog();
  79         preempt_disable();
  80
  81         /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
  82         if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
  83                 xen_enable_sysenter();
  84                 xen_enable_syscall();
  85         }
  86         cpu = smp_processor_id();
  87         smp_store_cpu_info(cpu);
  88         cpu_data(cpu).x86_max_cores = 1;
  89         set_cpu_sibling_map(cpu);
  90
  91         speculative_store_bypass_ht_init();
  92
  93         xen_setup_cpu_clockevents();
  94
  95         notify_cpu_starting(cpu);
  96
  97         set_cpu_online(cpu, true);
  98
  99         cpu_set_state_online(cpu);  /* Implies full memory barrier. */
 100
 101         /* We can take interrupts now: we're officially "up". */
 102         local_irq_enable();
 103 }
 104
 105 /*
 106  * Note: cpu parameter is only relevant for PVH. The reason for passing it
 107  * is we can't do smp_processor_id until the percpu segments are loaded, for
 108  * which we need the cpu number! So we pass it in rdi as first parameter.
 109  */
 110 asmlinkage __visible void cpu_bringup_and_idle(int cpu)
 111 {
 112 #ifdef CONFIG_XEN_PVH
 113         if (xen_feature(XENFEAT_auto_translated_physmap) &&
 114             xen_feature(XENFEAT_supervisor_mode_kernel))
 115                 xen_pvh_secondary_vcpu_init(cpu);
 116 #endif
 117         cpu_bringup();
 118         cpu_startup_entry(CPUHP_ONLINE);
 119         prevent_tail_call_optimization();
 120 }
 121
 122 static void xen_smp_intr_free(unsigned int cpu)
 123 {
 124         if (per_cpu(xen_resched_irq, cpu).irq >= 0) {
 125                 unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu).irq, NULL);
 126                 per_cpu(xen_resched_irq, cpu).irq = -1;
 127                 kfree(per_cpu(xen_resched_irq, cpu).name);
 128                 per_cpu(xen_resched_irq, cpu).name = NULL;
 129         }
 130         if (per_cpu(xen_callfunc_irq, cpu).irq >= 0) {
 131                 unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu).irq, NULL);
 132                 per_cpu(xen_callfunc_irq, cpu).irq = -1;
 133                 kfree(per_cpu(xen_callfunc_irq, cpu).name);
 134                 per_cpu(xen_callfunc_irq, cpu).name = NULL;
 135         }
 136         if (per_cpu(xen_debug_irq, cpu).irq >= 0) {
 137                 unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu).irq, NULL);
 138                 per_cpu(xen_debug_irq, cpu).irq = -1;
 139                 kfree(per_cpu(xen_debug_irq, cpu).name);
 140                 per_cpu(xen_debug_irq, cpu).name = NULL;
 141         }
 142         if (per_cpu(xen_callfuncsingle_irq, cpu).irq >= 0) {
 143                 unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu).irq,
 144                                        NULL);
 145                 per_cpu(xen_callfuncsingle_irq, cpu).irq = -1;
 146                 kfree(per_cpu(xen_callfuncsingle_irq, cpu).name);
 147                 per_cpu(xen_callfuncsingle_irq, cpu).name = NULL;
 148         }
 149         if (xen_hvm_domain())
 150                 return;
 151
 152         if (per_cpu(xen_irq_work, cpu).irq >= 0) {
 153                 unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL);
 154                 per_cpu(xen_irq_work, cpu).irq = -1;
 155                 kfree(per_cpu(xen_irq_work, cpu).name);
 156                 per_cpu(xen_irq_work, cpu).name = NULL;
 157         }
 158
 159         if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
 160                 unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
 161                 per_cpu(xen_pmu_irq, cpu).irq = -1;
 162                 kfree(per_cpu(xen_pmu_irq, cpu).name);
 163                 per_cpu(xen_pmu_irq, cpu).name = NULL;
 164         }
 165 };
 166 static int xen_smp_intr_init(unsigned int cpu)
 167 {
 168         int rc;
 169         char *resched_name, *callfunc_name, *debug_name, *pmu_name;
 170
 171         resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
 172         rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
 173                                     cpu,
 174                                     xen_reschedule_interrupt,
 175                                     IRQF_PERCPU|IRQF_NOBALANCING,
 176                                     resched_name,
 177                                     NULL);
 178         if (rc < 0)
 179                 goto fail;
 180         per_cpu(xen_resched_irq, cpu).irq = rc;
 181         per_cpu(xen_resched_irq, cpu).name = resched_name;
 182
 183         callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
 184         rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
 185                                     cpu,
 186                                     xen_call_function_interrupt,
 187                                     IRQF_PERCPU|IRQF_NOBALANCING,
 188                                     callfunc_name,
 189                                     NULL);
 190         if (rc < 0)
 191                 goto fail;
 192         per_cpu(xen_callfunc_irq, cpu).irq = rc;
 193         per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
 194
 195         debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
 196         rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
 197                                      IRQF_PERCPU | IRQF_NOBALANCING,
 198                                      debug_name, NULL);
 199         if (rc < 0)
 200                 goto fail;
 201         per_cpu(xen_debug_irq, cpu).irq = rc;
 202         per_cpu(xen_debug_irq, cpu).name = debug_name;
 203
 204         callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
 205         rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
 206                                     cpu,
 207                                     xen_call_function_single_interrupt,
 208                                     IRQF_PERCPU|IRQF_NOBALANCING,
 209                                     callfunc_name,
 210                                     NULL);
 211         if (rc < 0)
 212                 goto fail;
 213         per_cpu(xen_callfuncsingle_irq, cpu).irq = rc;
 214         per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
 215
 216         /*
 217          * The IRQ worker on PVHVM goes through the native path and uses the
 218          * IPI mechanism.
 219          */
 220         if (xen_hvm_domain())
 221                 return 0;
 222
 223         callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
 224         rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
 225                                     cpu,
 226                                     xen_irq_work_interrupt,
 227                                     IRQF_PERCPU|IRQF_NOBALANCING,
 228                                     callfunc_name,
 229                                     NULL);
 230         if (rc < 0)
 231                 goto fail;
 232         per_cpu(xen_irq_work, cpu).irq = rc;
 233         per_cpu(xen_irq_work, cpu).name = callfunc_name;
 234
 235         if (is_xen_pmu(cpu)) {
 236                 pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
 237                 rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
 238                                              xen_pmu_irq_handler,
 239                                              IRQF_PERCPU|IRQF_NOBALANCING,
 240                                              pmu_name, NULL);
 241                 if (rc < 0)
 242                         goto fail;
 243                 per_cpu(xen_pmu_irq, cpu).irq = rc;
 244                 per_cpu(xen_pmu_irq, cpu).name = pmu_name;
 245         }
 246
 247         return 0;
 248
 249  fail:
 250         xen_smp_intr_free(cpu);
 251         return rc;
 252 }
 253
 254 static void __init xen_fill_possible_map(void)
 255 {
 256         int i, rc;
 257
 258         if (xen_initial_domain())
 259                 return;
 260
 261         for (i = 0; i < nr_cpu_ids; i++) {
 262                 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 263                 if (rc >= 0) {
 264                         num_processors++;
 265                         set_cpu_possible(i, true);
 266                 }
 267         }
 268 }
 269
 270 static void __init xen_filter_cpu_maps(void)
 271 {
 272         int i, rc;
 273         unsigned int subtract = 0;
 274
 275         if (!xen_initial_domain())
 276                 return;
 277
 278         num_processors = 0;
 279         disabled_cpus = 0;
 280         for (i = 0; i < nr_cpu_ids; i++) {
 281                 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 282                 if (rc >= 0) {
 283                         num_processors++;
 284                         set_cpu_possible(i, true);
 285                 } else {
 286                         set_cpu_possible(i, false);
 287                         set_cpu_present(i, false);
 288                         subtract++;
 289                 }
 290         }
 291 #ifdef CONFIG_HOTPLUG_CPU
 292         /* This is akin to using 'nr_cpus' on the Linux command line.
 293          * Which is OK as when we use 'dom0_max_vcpus=X' we can only
 294          * have up to X, while nr_cpu_ids is greater than X. This
 295          * normally is not a problem, except when CPU hotplugging
 296          * is involved and then there might be more than X CPUs
 297          * in the guest - which will not work as there is no
 298          * hypercall to expand the max number of VCPUs an already
 299          * running guest has. So cap it up to X. */
 300         if (subtract)
 301                 nr_cpu_ids = nr_cpu_ids - subtract;
 302 #endif
 303
 304 }
 305
 306 static void __init xen_smp_prepare_boot_cpu(void)
 307 {
 308         BUG_ON(smp_processor_id() != 0);
 309         native_smp_prepare_boot_cpu();
 310
 311         if (xen_pv_domain()) {
 312                 if (!xen_feature(XENFEAT_writable_page_tables))
 313                         /* We've switched to the "real" per-cpu gdt, so make
 314                          * sure the old memory can be recycled. */
 315                         make_lowmem_page_readwrite(xen_initial_gdt);
 316
 317 #ifdef CONFIG_X86_32
 318                 /*
 319                  * Xen starts us with XEN_FLAT_RING1_DS, but linux code
 320                  * expects __USER_DS
 321                  */
 322                 loadsegment(ds, __USER_DS);
 323                 loadsegment(es, __USER_DS);
 324 #endif
 325
 326                 xen_filter_cpu_maps();
 327                 xen_setup_vcpu_info_placement();
 328         }
 329         /*
 330          * The alternative logic (which patches the unlock/lock) runs before
 331          * the smp bootup up code is activated. Hence we need to set this up
 332          * the core kernel is being patched. Otherwise we will have only
 333          * modules patched but not core code.
 334          */
 335         xen_init_spinlocks();
 336 }
 337
 338 static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 339 {
 340         unsigned cpu;
 341         unsigned int i;
 342
 343         if (skip_ioapic_setup) {
 344                 char *m = (max_cpus == 0) ?
 345                         "The nosmp parameter is incompatible with Xen; " \
 346                         "use Xen dom0_max_vcpus=1 parameter" :
 347                         "The noapic parameter is incompatible with Xen";
 348
 349                 xen_raw_printk(m);
 350                 panic(m);
 351         }
 352         xen_init_lock_cpu(0);
 353
 354         smp_store_boot_cpu_info();
 355         cpu_data(0).x86_max_cores = 1;
 356
 357         for_each_possible_cpu(i) {
 358                 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
 359                 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
 360                 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
 361         }
 362         set_cpu_sibling_map(0);
 363
 364         speculative_store_bypass_ht_init();
 365
 366         xen_pmu_init(0);
 367
 368         if (xen_smp_intr_init(0))
 369                 BUG();
 370
 371         if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
 372                 panic("could not allocate xen_cpu_initialized_map\n");
 373
 374         cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
 375
 376         /* Restrict the possible_map according to max_cpus. */
 377         while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
 378                 for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
 379                         continue;
 380                 set_cpu_possible(cpu, false);
 381         }
 382
 383         for_each_possible_cpu(cpu)
 384                 set_cpu_present(cpu, true);
 385 }
 386
 387 static int
 388 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 389 {
 390         struct vcpu_guest_context *ctxt;
 391         struct desc_struct *gdt;
 392         unsigned long gdt_mfn;
 393
 394         /* used to tell cpu_init() that it can proceed with initialization */
 395         cpumask_set_cpu(cpu, cpu_callout_mask);
 396         if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
 397                 return 0;
 398
 399         ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 400         if (ctxt == NULL)
 401                 return -ENOMEM;
 402
 403         gdt = get_cpu_gdt_table(cpu);
 404
 405 #ifdef CONFIG_X86_32
 406         /* Note: PVH is not yet supported on x86_32. */
 407         ctxt->user_regs.fs = __KERNEL_PERCPU;
 408         ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
 409 #endif
 410         memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
 411
 412         if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 413                 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
 414                 ctxt->flags = VGCF_IN_KERNEL;
 415                 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
 416                 ctxt->user_regs.ds = __USER_DS;
 417                 ctxt->user_regs.es = __USER_DS;
 418                 ctxt->user_regs.ss = __KERNEL_DS;
 419
 420                 xen_copy_trap_info(ctxt->trap_ctxt);
 421
 422                 ctxt->ldt_ents = 0;
 423
 424                 BUG_ON((unsigned long)gdt & ~PAGE_MASK);
 425
 426                 gdt_mfn = arbitrary_virt_to_mfn(gdt);
 427                 make_lowmem_page_readonly(gdt);
 428                 make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
 429
 430                 ctxt->gdt_frames[0] = gdt_mfn;
 431                 ctxt->gdt_ents      = GDT_ENTRIES;
 432
 433                 ctxt->kernel_ss = __KERNEL_DS;
 434                 ctxt->kernel_sp = idle->thread.sp0;
 435
 436 #ifdef CONFIG_X86_32
 437                 ctxt->event_callback_cs     = __KERNEL_CS;
 438                 ctxt->failsafe_callback_cs  = __KERNEL_CS;
 439 #else
 440                 ctxt->gs_base_kernel = per_cpu_offset(cpu);
 441 #endif
 442                 ctxt->event_callback_eip    =
 443                                         (unsigned long)xen_hypervisor_callback;
 444                 ctxt->failsafe_callback_eip =
 445                                         (unsigned long)xen_failsafe_callback;
 446                 ctxt->user_regs.cs = __KERNEL_CS;
 447                 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
 448         }
 449 #ifdef CONFIG_XEN_PVH
 450         else {
 451                 /*
 452                  * The vcpu comes on kernel page tables which have the NX pte
 453                  * bit set. This means before DS/SS is touched, NX in
 454                  * EFER must be set. Hence the following assembly glue code.
 455                  */
 456                 ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
 457                 ctxt->user_regs.rdi = cpu;
 458                 ctxt->user_regs.rsi = true;  /* entry == true */
 459         }
 460 #endif
 461         ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
 462         ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
 463         if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
 464                 BUG();
 465
 466         kfree(ctxt);
 467         return 0;
 468 }
 469
 470 static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
 471 {
 472         int rc;
 473
 474         common_cpu_up(cpu, idle);
 475
 476         xen_setup_runstate_info(cpu);
 477         xen_setup_timer(cpu);
 478         xen_init_lock_cpu(cpu);
 479
 480         /*
 481          * PV VCPUs are always successfully taken down (see 'while' loop
 482          * in xen_cpu_die()), so -EBUSY is an error.
 483          */
 484         rc = cpu_check_up_prepare(cpu);
 485         if (rc)
 486                 return rc;
 487
 488         /* make sure interrupts start blocked */
 489         per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
 490
 491         rc = cpu_initialize_context(cpu, idle);
 492         if (rc)
 493                 return rc;
 494
 495         xen_pmu_init(cpu);
 496
 497         rc = xen_smp_intr_init(cpu);
 498         if (rc)
 499                 return rc;
 500
 501         rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
 502         BUG_ON(rc);
 503
 504         while (cpu_report_state(cpu) != CPU_ONLINE)
 505                 HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
 506
 507         return 0;
 508 }
 509
 510 static void xen_smp_cpus_done(unsigned int max_cpus)
 511 {
 512 }
 513
 514 #ifdef CONFIG_HOTPLUG_CPU
 515 static int xen_cpu_disable(void)
 516 {
 517         unsigned int cpu = smp_processor_id();
 518         if (cpu == 0)
 519                 return -EBUSY;
 520
 521         cpu_disable_common();
 522
 523         load_cr3(swapper_pg_dir);
 524         return 0;
 525 }
 526
 527 static void xen_cpu_die(unsigned int cpu)
 528 {
 529         while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
 530                 __set_current_state(TASK_UNINTERRUPTIBLE);
 531                 schedule_timeout(HZ/10);
 532         }
 533
 534         if (common_cpu_die(cpu) == 0) {
 535                 xen_smp_intr_free(cpu);
 536                 xen_uninit_lock_cpu(cpu);
 537                 xen_teardown_timer(cpu);
 538                 xen_pmu_finish(cpu);
 539         }
 540 }
 541
 542 static void xen_play_dead(void) /* used only with HOTPLUG_CPU */
 543 {
 544         play_dead_common();
 545         HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
 546         cpu_bringup();
 547         /*
 548          * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
 549          * clears certain data that the cpu_idle loop (which called us
 550          * and that we return from) expects. The only way to get that
 551          * data back is to call:
 552          */
 553         tick_nohz_idle_enter();
 554 }
 555
 556 #else /* !CONFIG_HOTPLUG_CPU */
 557 static int xen_cpu_disable(void)
 558 {
 559         return -ENOSYS;
 560 }
 561
 562 static void xen_cpu_die(unsigned int cpu)
 563 {
 564         BUG();
 565 }
 566
 567 static void xen_play_dead(void)
 568 {
 569         BUG();
 570 }
 571
 572 #endif
 573 static void stop_self(void *v)
 574 {
 575         int cpu = smp_processor_id();
 576
 577         /* make sure we're not pinning something down */
 578         load_cr3(swapper_pg_dir);
 579         /* should set up a minimal gdt */
 580
 581         set_cpu_online(cpu, false);
 582
 583         HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
 584         BUG();
 585 }
 586
 587 static void xen_stop_other_cpus(int wait)
 588 {
 589         smp_call_function(stop_self, NULL, wait);
 590 }
 591
 592 static void xen_smp_send_reschedule(int cpu)
 593 {
 594         xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 595 }
 596
 597 static void __xen_send_IPI_mask(const struct cpumask *mask,
 598                               int vector)
 599 {
 600         unsigned cpu;
 601
 602         for_each_cpu_and(cpu, mask, cpu_online_mask)
 603                 xen_send_IPI_one(cpu, vector);
 604 }
 605
 606 static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
 607 {
 608         int cpu;
 609
 610         __xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 611
 612         /* Make sure other vcpus get a chance to run if they need to. */
 613         for_each_cpu(cpu, mask) {
 614                 if (xen_vcpu_stolen(cpu)) {
 615                         HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
 616                         break;
 617                 }
 618         }
 619 }
 620
 621 static void xen_smp_send_call_function_single_ipi(int cpu)
 622 {
 623         __xen_send_IPI_mask(cpumask_of(cpu),
 624                           XEN_CALL_FUNCTION_SINGLE_VECTOR);
 625 }
 626
 627 static inline int xen_map_vector(int vector)
 628 {
 629         int xen_vector;
 630
 631         switch (vector) {
 632         case RESCHEDULE_VECTOR:
 633                 xen_vector = XEN_RESCHEDULE_VECTOR;
 634                 break;
 635         case CALL_FUNCTION_VECTOR:
 636                 xen_vector = XEN_CALL_FUNCTION_VECTOR;
 637                 break;
 638         case CALL_FUNCTION_SINGLE_VECTOR:
 639                 xen_vector = XEN_CALL_FUNCTION_SINGLE_VECTOR;
 640                 break;
 641         case IRQ_WORK_VECTOR:
 642                 xen_vector = XEN_IRQ_WORK_VECTOR;
 643                 break;
 644 #ifdef CONFIG_X86_64
 645         case NMI_VECTOR:
 646         case APIC_DM_NMI: /* Some use that instead of NMI_VECTOR */
 647                 xen_vector = XEN_NMI_VECTOR;
 648                 break;
 649 #endif
 650         default:
 651                 xen_vector = -1;
 652                 printk(KERN_ERR "xen: vector 0x%x is not implemented\n",
 653                         vector);
 654         }
 655
 656         return xen_vector;
 657 }
 658
 659 void xen_send_IPI_mask(const struct cpumask *mask,
 660                               int vector)
 661 {
 662         int xen_vector = xen_map_vector(vector);
 663
 664         if (xen_vector >= 0)
 665                 __xen_send_IPI_mask(mask, xen_vector);
 666 }
 667
 668 void xen_send_IPI_all(int vector)
 669 {
 670         int xen_vector = xen_map_vector(vector);
 671
 672         if (xen_vector >= 0)
 673                 __xen_send_IPI_mask(cpu_online_mask, xen_vector);
 674 }
 675
 676 void xen_send_IPI_self(int vector)
 677 {
 678         int xen_vector = xen_map_vector(vector);
 679
 680         if (xen_vector >= 0)
 681                 xen_send_IPI_one(smp_processor_id(), xen_vector);
 682 }
 683
 684 void xen_send_IPI_mask_allbutself(const struct cpumask *mask,
 685                                 int vector)
 686 {
 687         unsigned cpu;
 688         unsigned int this_cpu = smp_processor_id();
 689         int xen_vector = xen_map_vector(vector);
 690
 691         if (!(num_online_cpus() > 1) || (xen_vector < 0))
 692                 return;
 693
 694         for_each_cpu_and(cpu, mask, cpu_online_mask) {
 695                 if (this_cpu == cpu)
 696                         continue;
 697
 698                 xen_send_IPI_one(cpu, xen_vector);
 699         }
 700 }
 701
 702 void xen_send_IPI_allbutself(int vector)
 703 {
 704         xen_send_IPI_mask_allbutself(cpu_online_mask, vector);
 705 }
 706
 707 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 708 {
 709         irq_enter();
 710         generic_smp_call_function_interrupt();
 711         inc_irq_stat(irq_call_count);
 712         irq_exit();
 713
 714         return IRQ_HANDLED;
 715 }
 716
 717 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 718 {
 719         irq_enter();
 720         generic_smp_call_function_single_interrupt();
 721         inc_irq_stat(irq_call_count);
 722         irq_exit();
 723
 724         return IRQ_HANDLED;
 725 }
 726
 727 static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
 728 {
 729         irq_enter();
 730         irq_work_run();
 731         inc_irq_stat(apic_irq_work_irqs);
 732         irq_exit();
 733
 734         return IRQ_HANDLED;
 735 }
 736
 737 static const struct smp_ops xen_smp_ops __initconst = {
 738         .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
 739         .smp_prepare_cpus = xen_smp_prepare_cpus,
 740         .smp_cpus_done = xen_smp_cpus_done,
 741
 742         .cpu_up = xen_cpu_up,
 743         .cpu_die = xen_cpu_die,
 744         .cpu_disable = xen_cpu_disable,
 745         .play_dead = xen_play_dead,
 746
 747         .stop_other_cpus = xen_stop_other_cpus,
 748         .smp_send_reschedule = xen_smp_send_reschedule,
 749
 750         .send_call_func_ipi = xen_smp_send_call_function_ipi,
 751         .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
 752 };
 753
 754 void __init xen_smp_init(void)
 755 {
 756         smp_ops = xen_smp_ops;
 757         xen_fill_possible_map();
 758 }
 759
 760 static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
 761 {
 762         native_smp_prepare_cpus(max_cpus);
 763         WARN_ON(xen_smp_intr_init(0));
 764
 765         xen_init_lock_cpu(0);
 766 }
 767
 768 static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
 769 {
 770         int rc;
 771
 772         /*
 773          * This can happen if CPU was offlined earlier and
 774          * offlining timed out in common_cpu_die().
 775          */
 776         if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
 777                 xen_smp_intr_free(cpu);
 778                 xen_uninit_lock_cpu(cpu);
 779         }
 780
 781         /*
 782          * xen_smp_intr_init() needs to run before native_cpu_up()
 783          * so that IPI vectors are set up on the booting CPU before
 784          * it is marked online in native_cpu_up().
 785         */
 786         rc = xen_smp_intr_init(cpu);
 787         WARN_ON(rc);
 788         if (!rc)
 789                 rc =  native_cpu_up(cpu, tidle);
 790
 791         /*
 792          * We must initialize the slowpath CPU kicker _after_ the native
 793          * path has executed. If we initialized it before none of the
 794          * unlocker IPI kicks would reach the booting CPU as the booting
 795          * CPU had not set itself 'online' in cpu_online_mask. That mask
 796          * is checked when IPIs are sent (on HVM at least).
 797          */
 798         xen_init_lock_cpu(cpu);
 799         return rc;
 800 }
 801
 802 void __init xen_hvm_smp_init(void)
 803 {
 804         if (!xen_have_vector_callback)
 805                 return;
 806         smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
 807         smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
 808         smp_ops.cpu_up = xen_hvm_cpu_up;
 809         smp_ops.cpu_die = xen_cpu_die;
 810         smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
 811         smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
 812         smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu;
 813 }