arch/x86/xen/smp.c

   1 /*
   2  * Xen SMP support
   3  *
   4  * This file implements the Xen versions of smp_ops.  SMP under Xen is
   5  * very straightforward.  Bringing a CPU up is simply a matter of
   6  * loading its initial context and setting it running.
   7  *
   8  * IPIs are handled through the Xen event mechanism.
   9  *
  10  * Because virtual CPUs can be scheduled onto any real CPU, there's no
  11  * useful topology information for the kernel to make use of.  As a
  12  * result, all CPUs are treated as if they're single-core and
  13  * single-threaded.
  14  */
  15 #include <linux/sched.h>
  16 #include <linux/err.h>
  17 #include <linux/slab.h>
  18 #include <linux/smp.h>
  19 #include <linux/irq_work.h>
  20 #include <linux/tick.h>
  21
  22 #include <asm/paravirt.h>
  23 #include <asm/desc.h>
  24 #include <asm/pgtable.h>
  25 #include <asm/cpu.h>
  26
  27 #include <xen/interface/xen.h>
  28 #include <xen/interface/vcpu.h>
  29 #include <xen/interface/xenpmu.h>
  30
  31 #include <asm/spec-ctrl.h>
  32 #include <asm/xen/interface.h>
  33 #include <asm/xen/hypercall.h>
  34
  35 #include <xen/xen.h>
  36 #include <xen/page.h>
  37 #include <xen/events.h>
  38
  39 #include <xen/hvc-console.h>
  40 #include "xen-ops.h"
  41 #include "mmu.h"
  42 #include "smp.h"
  43 #include "pmu.h"
  44
  45 cpumask_var_t xen_cpu_initialized_map;
  46
  47 struct xen_common_irq {
  48         int irq;
  49         char *name;
  50 };
  51 static DEFINE_PER_CPU(struct xen_common_irq, xen_resched_irq) = { .irq = -1 };
  52 static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 };
  53 static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };
  54 static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
  55 static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 };
  56 static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
  57
  58 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
  59 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
  60 static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id);
  61
  62 /*
  63  * Reschedule call back.
  64  */
  65 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
  66 {
  67         inc_irq_stat(irq_resched_count);
  68         scheduler_ipi();
  69
  70         return IRQ_HANDLED;
  71 }
  72
  73 static void cpu_bringup(void)
  74 {
  75         int cpu;
  76
  77         cpu_init();
  78         touch_softlockup_watchdog();
  79         preempt_disable();
  80
  81         /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
  82         if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
  83                 xen_enable_sysenter();
  84                 xen_enable_syscall();
  85         }
  86         cpu = smp_processor_id();
  87         smp_store_cpu_info(cpu);
  88         cpu_data(cpu).x86_max_cores = 1;
  89         set_cpu_sibling_map(cpu);
  90
  91         speculative_store_bypass_ht_init();
  92
  93         xen_setup_cpu_clockevents();
  94
  95         notify_cpu_starting(cpu);
  96
  97         set_cpu_online(cpu, true);
  98
  99         cpu_set_state_online(cpu);  /* Implies full memory barrier. */
 100
 101         /* We can take interrupts now: we're officially "up". */
 102         local_irq_enable();
 103 }
 104
 105 /*
 106  * Note: cpu parameter is only relevant for PVH. The reason for passing it
 107  * is we can't do smp_processor_id until the percpu segments are loaded, for
 108  * which we need the cpu number! So we pass it in rdi as first parameter.
 109  */
 110 asmlinkage __visible void cpu_bringup_and_idle(int cpu)
 111 {
 112 #ifdef CONFIG_XEN_PVH
 113         if (xen_feature(XENFEAT_auto_translated_physmap) &&
 114             xen_feature(XENFEAT_supervisor_mode_kernel))
 115                 xen_pvh_secondary_vcpu_init(cpu);
 116 #endif
 117         cpu_bringup();
 118         cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 119         prevent_tail_call_optimization();
 120 }
 121
 122 void xen_smp_intr_free(unsigned int cpu)
 123 {
 124         if (per_cpu(xen_resched_irq, cpu).irq >= 0) {
 125                 unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu).irq, NULL);
 126                 per_cpu(xen_resched_irq, cpu).irq = -1;
 127                 kfree(per_cpu(xen_resched_irq, cpu).name);
 128                 per_cpu(xen_resched_irq, cpu).name = NULL;
 129         }
 130         if (per_cpu(xen_callfunc_irq, cpu).irq >= 0) {
 131                 unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu).irq, NULL);
 132                 per_cpu(xen_callfunc_irq, cpu).irq = -1;
 133                 kfree(per_cpu(xen_callfunc_irq, cpu).name);
 134                 per_cpu(xen_callfunc_irq, cpu).name = NULL;
 135         }
 136         if (per_cpu(xen_debug_irq, cpu).irq >= 0) {
 137                 unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu).irq, NULL);
 138                 per_cpu(xen_debug_irq, cpu).irq = -1;
 139                 kfree(per_cpu(xen_debug_irq, cpu).name);
 140                 per_cpu(xen_debug_irq, cpu).name = NULL;
 141         }
 142         if (per_cpu(xen_callfuncsingle_irq, cpu).irq >= 0) {
 143                 unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu).irq,
 144                                        NULL);
 145                 per_cpu(xen_callfuncsingle_irq, cpu).irq = -1;
 146                 kfree(per_cpu(xen_callfuncsingle_irq, cpu).name);
 147                 per_cpu(xen_callfuncsingle_irq, cpu).name = NULL;
 148         }
 149         if (xen_hvm_domain())
 150                 return;
 151
 152         if (per_cpu(xen_irq_work, cpu).irq >= 0) {
 153                 unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL);
 154                 per_cpu(xen_irq_work, cpu).irq = -1;
 155                 kfree(per_cpu(xen_irq_work, cpu).name);
 156                 per_cpu(xen_irq_work, cpu).name = NULL;
 157         }
 158
 159         if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
 160                 unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
 161                 per_cpu(xen_pmu_irq, cpu).irq = -1;
 162                 kfree(per_cpu(xen_pmu_irq, cpu).name);
 163                 per_cpu(xen_pmu_irq, cpu).name = NULL;
 164         }
 165 };
 166 int xen_smp_intr_init(unsigned int cpu)
 167 {
 168         int rc;
 169         char *resched_name, *callfunc_name, *debug_name, *pmu_name;
 170
 171         resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
 172         rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
 173                                     cpu,
 174                                     xen_reschedule_interrupt,
 175                                     IRQF_PERCPU|IRQF_NOBALANCING,
 176                                     resched_name,
 177                                     NULL);
 178         if (rc < 0)
 179                 goto fail;
 180         per_cpu(xen_resched_irq, cpu).irq = rc;
 181         per_cpu(xen_resched_irq, cpu).name = resched_name;
 182
 183         callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
 184         rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
 185                                     cpu,
 186                                     xen_call_function_interrupt,
 187                                     IRQF_PERCPU|IRQF_NOBALANCING,
 188                                     callfunc_name,
 189                                     NULL);
 190         if (rc < 0)
 191                 goto fail;
 192         per_cpu(xen_callfunc_irq, cpu).irq = rc;
 193         per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
 194
 195         debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
 196         rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
 197                                      IRQF_PERCPU | IRQF_NOBALANCING,
 198                                      debug_name, NULL);
 199         if (rc < 0)
 200                 goto fail;
 201         per_cpu(xen_debug_irq, cpu).irq = rc;
 202         per_cpu(xen_debug_irq, cpu).name = debug_name;
 203
 204         callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
 205         rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
 206                                     cpu,
 207                                     xen_call_function_single_interrupt,
 208                                     IRQF_PERCPU|IRQF_NOBALANCING,
 209                                     callfunc_name,
 210                                     NULL);
 211         if (rc < 0)
 212                 goto fail;
 213         per_cpu(xen_callfuncsingle_irq, cpu).irq = rc;
 214         per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
 215
 216         /*
 217          * The IRQ worker on PVHVM goes through the native path and uses the
 218          * IPI mechanism.
 219          */
 220         if (xen_hvm_domain())
 221                 return 0;
 222
 223         callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
 224         rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
 225                                     cpu,
 226                                     xen_irq_work_interrupt,
 227                                     IRQF_PERCPU|IRQF_NOBALANCING,
 228                                     callfunc_name,
 229                                     NULL);
 230         if (rc < 0)
 231                 goto fail;
 232         per_cpu(xen_irq_work, cpu).irq = rc;
 233         per_cpu(xen_irq_work, cpu).name = callfunc_name;
 234
 235         if (is_xen_pmu(cpu)) {
 236                 pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
 237                 rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
 238                                              xen_pmu_irq_handler,
 239                                              IRQF_PERCPU|IRQF_NOBALANCING,
 240                                              pmu_name, NULL);
 241                 if (rc < 0)
 242                         goto fail;
 243                 per_cpu(xen_pmu_irq, cpu).irq = rc;
 244                 per_cpu(xen_pmu_irq, cpu).name = pmu_name;
 245         }
 246
 247         return 0;
 248
 249  fail:
 250         xen_smp_intr_free(cpu);
 251         return rc;
 252 }
 253
 254 static void __init xen_fill_possible_map(void)
 255 {
 256         int i, rc;
 257
 258         if (xen_initial_domain())
 259                 return;
 260
 261         for (i = 0; i < nr_cpu_ids; i++) {
 262                 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 263                 if (rc >= 0) {
 264                         num_processors++;
 265                         set_cpu_possible(i, true);
 266                 }
 267         }
 268 }
 269
 270 static void __init xen_filter_cpu_maps(void)
 271 {
 272         int i, rc;
 273         unsigned int subtract = 0;
 274
 275         if (!xen_initial_domain())
 276                 return;
 277
 278         num_processors = 0;
 279         disabled_cpus = 0;
 280         for (i = 0; i < nr_cpu_ids; i++) {
 281                 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 282                 if (rc >= 0) {
 283                         num_processors++;
 284                         set_cpu_possible(i, true);
 285                 } else {
 286                         set_cpu_possible(i, false);
 287                         set_cpu_present(i, false);
 288                         subtract++;
 289                 }
 290         }
 291 #ifdef CONFIG_HOTPLUG_CPU
 292         /* This is akin to using 'nr_cpus' on the Linux command line.
 293          * Which is OK as when we use 'dom0_max_vcpus=X' we can only
 294          * have up to X, while nr_cpu_ids is greater than X. This
 295          * normally is not a problem, except when CPU hotplugging
 296          * is involved and then there might be more than X CPUs
 297          * in the guest - which will not work as there is no
 298          * hypercall to expand the max number of VCPUs an already
 299          * running guest has. So cap it up to X. */
 300         if (subtract)
 301                 nr_cpu_ids = nr_cpu_ids - subtract;
 302 #endif
 303
 304 }
 305
 306 static void __init xen_pv_smp_prepare_boot_cpu(void)
 307 {
 308         BUG_ON(smp_processor_id() != 0);
 309         native_smp_prepare_boot_cpu();
 310
 311         if (!xen_feature(XENFEAT_writable_page_tables))
 312                 /* We've switched to the "real" per-cpu gdt, so make
 313                  * sure the old memory can be recycled. */
 314                 make_lowmem_page_readwrite(xen_initial_gdt);
 315
 316 #ifdef CONFIG_X86_32
 317         /*
 318          * Xen starts us with XEN_FLAT_RING1_DS, but linux code
 319          * expects __USER_DS
 320          */
 321         loadsegment(ds, __USER_DS);
 322         loadsegment(es, __USER_DS);
 323 #endif
 324
 325         xen_filter_cpu_maps();
 326         xen_setup_vcpu_info_placement();
 327
 328         /*
 329          * The alternative logic (which patches the unlock/lock) runs before
 330          * the smp bootup up code is activated. Hence we need to set this up
 331          * the core kernel is being patched. Otherwise we will have only
 332          * modules patched but not core code.
 333          */
 334         xen_init_spinlocks();
 335 }
 336
 337 static void __init xen_hvm_smp_prepare_boot_cpu(void)
 338 {
 339         BUG_ON(smp_processor_id() != 0);
 340         native_smp_prepare_boot_cpu();
 341
 342         /*
 343          * Setup vcpu_info for boot CPU.
 344          */
 345         xen_vcpu_setup(0);
 346
 347         /*
 348          * The alternative logic (which patches the unlock/lock) runs before
 349          * the smp bootup up code is activated. Hence we need to set this up
 350          * the core kernel is being patched. Otherwise we will have only
 351          * modules patched but not core code.
 352          */
 353         xen_init_spinlocks();
 354 }
 355
 356 static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 357 {
 358         unsigned cpu;
 359         unsigned int i;
 360
 361         if (skip_ioapic_setup) {
 362                 char *m = (max_cpus == 0) ?
 363                         "The nosmp parameter is incompatible with Xen; " \
 364                         "use Xen dom0_max_vcpus=1 parameter" :
 365                         "The noapic parameter is incompatible with Xen";
 366
 367                 xen_raw_printk(m);
 368                 panic(m);
 369         }
 370         xen_init_lock_cpu(0);
 371
 372         smp_store_boot_cpu_info();
 373         cpu_data(0).x86_max_cores = 1;
 374
 375         for_each_possible_cpu(i) {
 376                 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
 377                 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
 378                 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
 379         }
 380         set_cpu_sibling_map(0);
 381
 382         speculative_store_bypass_ht_init();
 383
 384         xen_pmu_init(0);
 385
 386         if (xen_smp_intr_init(0))
 387                 BUG();
 388
 389         if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
 390                 panic("could not allocate xen_cpu_initialized_map\n");
 391
 392         cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
 393
 394         /* Restrict the possible_map according to max_cpus. */
 395         while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
 396                 for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
 397                         continue;
 398                 set_cpu_possible(cpu, false);
 399         }
 400
 401         for_each_possible_cpu(cpu)
 402                 set_cpu_present(cpu, true);
 403 }
 404
 405 static int
 406 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 407 {
 408         struct vcpu_guest_context *ctxt;
 409         struct desc_struct *gdt;
 410         unsigned long gdt_mfn;
 411
 412         /* used to tell cpu_init() that it can proceed with initialization */
 413         cpumask_set_cpu(cpu, cpu_callout_mask);
 414         if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
 415                 return 0;
 416
 417         ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 418         if (ctxt == NULL)
 419                 return -ENOMEM;
 420
 421         gdt = get_cpu_gdt_table(cpu);
 422
 423 #ifdef CONFIG_X86_32
 424         /* Note: PVH is not yet supported on x86_32. */
 425         ctxt->user_regs.fs = __KERNEL_PERCPU;
 426         ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
 427 #endif
 428         memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
 429
 430         if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 431                 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
 432                 ctxt->flags = VGCF_IN_KERNEL;
 433                 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
 434                 ctxt->user_regs.ds = __USER_DS;
 435                 ctxt->user_regs.es = __USER_DS;
 436                 ctxt->user_regs.ss = __KERNEL_DS;
 437
 438                 xen_copy_trap_info(ctxt->trap_ctxt);
 439
 440                 ctxt->ldt_ents = 0;
 441
 442                 BUG_ON((unsigned long)gdt & ~PAGE_MASK);
 443
 444                 gdt_mfn = arbitrary_virt_to_mfn(gdt);
 445                 make_lowmem_page_readonly(gdt);
 446                 make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
 447
 448                 ctxt->gdt_frames[0] = gdt_mfn;
 449                 ctxt->gdt_ents      = GDT_ENTRIES;
 450
 451                 ctxt->kernel_ss = __KERNEL_DS;
 452                 ctxt->kernel_sp = idle->thread.sp0;
 453
 454 #ifdef CONFIG_X86_32
 455                 ctxt->event_callback_cs     = __KERNEL_CS;
 456                 ctxt->failsafe_callback_cs  = __KERNEL_CS;
 457 #else
 458                 ctxt->gs_base_kernel = per_cpu_offset(cpu);
 459 #endif
 460                 ctxt->event_callback_eip    =
 461                                         (unsigned long)xen_hypervisor_callback;
 462                 ctxt->failsafe_callback_eip =
 463                                         (unsigned long)xen_failsafe_callback;
 464                 ctxt->user_regs.cs = __KERNEL_CS;
 465                 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
 466         }
 467 #ifdef CONFIG_XEN_PVH
 468         else {
 469                 /*
 470                  * The vcpu comes on kernel page tables which have the NX pte
 471                  * bit set. This means before DS/SS is touched, NX in
 472                  * EFER must be set. Hence the following assembly glue code.
 473                  */
 474                 ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
 475                 ctxt->user_regs.rdi = cpu;
 476                 ctxt->user_regs.rsi = true;  /* entry == true */
 477         }
 478 #endif
 479         ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
 480         ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
 481         if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
 482                 BUG();
 483
 484         kfree(ctxt);
 485         return 0;
 486 }
 487
 488 static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
 489 {
 490         int rc;
 491
 492         common_cpu_up(cpu, idle);
 493
 494         xen_setup_runstate_info(cpu);
 495
 496         /*
 497          * PV VCPUs are always successfully taken down (see 'while' loop
 498          * in xen_cpu_die()), so -EBUSY is an error.
 499          */
 500         rc = cpu_check_up_prepare(cpu);
 501         if (rc)
 502                 return rc;
 503
 504         /* make sure interrupts start blocked */
 505         per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
 506
 507         rc = cpu_initialize_context(cpu, idle);
 508         if (rc)
 509                 return rc;
 510
 511         xen_pmu_init(cpu);
 512
 513         rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL);
 514         BUG_ON(rc);
 515
 516         while (cpu_report_state(cpu) != CPU_ONLINE)
 517                 HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
 518
 519         return 0;
 520 }
 521
 522 static void xen_smp_cpus_done(unsigned int max_cpus)
 523 {
 524 }
 525
 526 #ifdef CONFIG_HOTPLUG_CPU
 527 static int xen_cpu_disable(void)
 528 {
 529         unsigned int cpu = smp_processor_id();
 530         if (cpu == 0)
 531                 return -EBUSY;
 532
 533         cpu_disable_common();
 534
 535         load_cr3(swapper_pg_dir);
 536         return 0;
 537 }
 538
 539 static void xen_cpu_die(unsigned int cpu)
 540 {
 541         while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up,
 542                                                      xen_vcpu_nr(cpu), NULL)) {
 543                 __set_current_state(TASK_UNINTERRUPTIBLE);
 544                 schedule_timeout(HZ/10);
 545         }
 546
 547         if (common_cpu_die(cpu) == 0) {
 548                 xen_smp_intr_free(cpu);
 549                 xen_uninit_lock_cpu(cpu);
 550                 xen_teardown_timer(cpu);
 551                 xen_pmu_finish(cpu);
 552         }
 553 }
 554
 555 static void xen_play_dead(void) /* used only with HOTPLUG_CPU */
 556 {
 557         play_dead_common();
 558         HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL);
 559         cpu_bringup();
 560         /*
 561          * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
 562          * clears certain data that the cpu_idle loop (which called us
 563          * and that we return from) expects. The only way to get that
 564          * data back is to call:
 565          */
 566         tick_nohz_idle_enter();
 567
 568         cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 569 }
 570
 571 #else /* !CONFIG_HOTPLUG_CPU */
 572 static int xen_cpu_disable(void)
 573 {
 574         return -ENOSYS;
 575 }
 576
 577 static void xen_cpu_die(unsigned int cpu)
 578 {
 579         BUG();
 580 }
 581
 582 static void xen_play_dead(void)
 583 {
 584         BUG();
 585 }
 586
 587 #endif
 588 static void stop_self(void *v)
 589 {
 590         int cpu = smp_processor_id();
 591
 592         /* make sure we're not pinning something down */
 593         load_cr3(swapper_pg_dir);
 594         /* should set up a minimal gdt */
 595
 596         set_cpu_online(cpu, false);
 597
 598         HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL);
 599         BUG();
 600 }
 601
 602 static void xen_stop_other_cpus(int wait)
 603 {
 604         smp_call_function(stop_self, NULL, wait);
 605 }
 606
 607 static void xen_smp_send_reschedule(int cpu)
 608 {
 609         xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 610 }
 611
 612 static void __xen_send_IPI_mask(const struct cpumask *mask,
 613                               int vector)
 614 {
 615         unsigned cpu;
 616
 617         for_each_cpu_and(cpu, mask, cpu_online_mask)
 618                 xen_send_IPI_one(cpu, vector);
 619 }
 620
 621 static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
 622 {
 623         int cpu;
 624
 625         __xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 626
 627         /* Make sure other vcpus get a chance to run if they need to. */
 628         for_each_cpu(cpu, mask) {
 629                 if (xen_vcpu_stolen(cpu)) {
 630                         HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
 631                         break;
 632                 }
 633         }
 634 }
 635
 636 static void xen_smp_send_call_function_single_ipi(int cpu)
 637 {
 638         __xen_send_IPI_mask(cpumask_of(cpu),
 639                           XEN_CALL_FUNCTION_SINGLE_VECTOR);
 640 }
 641
 642 static inline int xen_map_vector(int vector)
 643 {
 644         int xen_vector;
 645
 646         switch (vector) {
 647         case RESCHEDULE_VECTOR:
 648                 xen_vector = XEN_RESCHEDULE_VECTOR;
 649                 break;
 650         case CALL_FUNCTION_VECTOR:
 651                 xen_vector = XEN_CALL_FUNCTION_VECTOR;
 652                 break;
 653         case CALL_FUNCTION_SINGLE_VECTOR:
 654                 xen_vector = XEN_CALL_FUNCTION_SINGLE_VECTOR;
 655                 break;
 656         case IRQ_WORK_VECTOR:
 657                 xen_vector = XEN_IRQ_WORK_VECTOR;
 658                 break;
 659 #ifdef CONFIG_X86_64
 660         case NMI_VECTOR:
 661         case APIC_DM_NMI: /* Some use that instead of NMI_VECTOR */
 662                 xen_vector = XEN_NMI_VECTOR;
 663                 break;
 664 #endif
 665         default:
 666                 xen_vector = -1;
 667                 printk(KERN_ERR "xen: vector 0x%x is not implemented\n",
 668                         vector);
 669         }
 670
 671         return xen_vector;
 672 }
 673
 674 void xen_send_IPI_mask(const struct cpumask *mask,
 675                               int vector)
 676 {
 677         int xen_vector = xen_map_vector(vector);
 678
 679         if (xen_vector >= 0)
 680                 __xen_send_IPI_mask(mask, xen_vector);
 681 }
 682
 683 void xen_send_IPI_all(int vector)
 684 {
 685         int xen_vector = xen_map_vector(vector);
 686
 687         if (xen_vector >= 0)
 688                 __xen_send_IPI_mask(cpu_online_mask, xen_vector);
 689 }
 690
 691 void xen_send_IPI_self(int vector)
 692 {
 693         int xen_vector = xen_map_vector(vector);
 694
 695         if (xen_vector >= 0)
 696                 xen_send_IPI_one(smp_processor_id(), xen_vector);
 697 }
 698
 699 void xen_send_IPI_mask_allbutself(const struct cpumask *mask,
 700                                 int vector)
 701 {
 702         unsigned cpu;
 703         unsigned int this_cpu = smp_processor_id();
 704         int xen_vector = xen_map_vector(vector);
 705
 706         if (!(num_online_cpus() > 1) || (xen_vector < 0))
 707                 return;
 708
 709         for_each_cpu_and(cpu, mask, cpu_online_mask) {
 710                 if (this_cpu == cpu)
 711                         continue;
 712
 713                 xen_send_IPI_one(cpu, xen_vector);
 714         }
 715 }
 716
 717 void xen_send_IPI_allbutself(int vector)
 718 {
 719         xen_send_IPI_mask_allbutself(cpu_online_mask, vector);
 720 }
 721
 722 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 723 {
 724         irq_enter();
 725         generic_smp_call_function_interrupt();
 726         inc_irq_stat(irq_call_count);
 727         irq_exit();
 728
 729         return IRQ_HANDLED;
 730 }
 731
 732 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 733 {
 734         irq_enter();
 735         generic_smp_call_function_single_interrupt();
 736         inc_irq_stat(irq_call_count);
 737         irq_exit();
 738
 739         return IRQ_HANDLED;
 740 }
 741
 742 static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
 743 {
 744         irq_enter();
 745         irq_work_run();
 746         inc_irq_stat(apic_irq_work_irqs);
 747         irq_exit();
 748
 749         return IRQ_HANDLED;
 750 }
 751
 752 static const struct smp_ops xen_smp_ops __initconst = {
 753         .smp_prepare_boot_cpu = xen_pv_smp_prepare_boot_cpu,
 754         .smp_prepare_cpus = xen_smp_prepare_cpus,
 755         .smp_cpus_done = xen_smp_cpus_done,
 756
 757         .cpu_up = xen_cpu_up,
 758         .cpu_die = xen_cpu_die,
 759         .cpu_disable = xen_cpu_disable,
 760         .play_dead = xen_play_dead,
 761
 762         .stop_other_cpus = xen_stop_other_cpus,
 763         .smp_send_reschedule = xen_smp_send_reschedule,
 764
 765         .send_call_func_ipi = xen_smp_send_call_function_ipi,
 766         .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
 767 };
 768
 769 void __init xen_smp_init(void)
 770 {
 771         smp_ops = xen_smp_ops;
 772         xen_fill_possible_map();
 773 }
 774
 775 static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
 776 {
 777         native_smp_prepare_cpus(max_cpus);
 778         WARN_ON(xen_smp_intr_init(0));
 779
 780         xen_init_lock_cpu(0);
 781 }
 782
 783 void __init xen_hvm_smp_init(void)
 784 {
 785         if (!xen_have_vector_callback)
 786                 return;
 787         smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
 788         smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
 789         smp_ops.cpu_die = xen_cpu_die;
 790         smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
 791         smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
 792         smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
 793 }