arm64: dts: qcom: sm8550: add TRNG node
[linux-modified.git] / arch / loongarch / kernel / smp.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4  *
5  * Derived from MIPS:
6  * Copyright (C) 2000, 2001 Kanoj Sarcar
7  * Copyright (C) 2000, 2001 Ralf Baechle
8  * Copyright (C) 2000, 2001 Silicon Graphics, Inc.
9  * Copyright (C) 2000, 2001, 2003 Broadcom Corporation
10  */
11 #include <linux/acpi.h>
12 #include <linux/cpu.h>
13 #include <linux/cpumask.h>
14 #include <linux/init.h>
15 #include <linux/interrupt.h>
16 #include <linux/profile.h>
17 #include <linux/seq_file.h>
18 #include <linux/smp.h>
19 #include <linux/threads.h>
20 #include <linux/export.h>
21 #include <linux/syscore_ops.h>
22 #include <linux/time.h>
23 #include <linux/tracepoint.h>
24 #include <linux/sched/hotplug.h>
25 #include <linux/sched/task_stack.h>
26
27 #include <asm/cpu.h>
28 #include <asm/idle.h>
29 #include <asm/loongson.h>
30 #include <asm/mmu_context.h>
31 #include <asm/numa.h>
32 #include <asm/processor.h>
33 #include <asm/setup.h>
34 #include <asm/time.h>
35
36 int __cpu_number_map[NR_CPUS];   /* Map physical to logical */
37 EXPORT_SYMBOL(__cpu_number_map);
38
39 int __cpu_logical_map[NR_CPUS];         /* Map logical to physical */
40 EXPORT_SYMBOL(__cpu_logical_map);
41
42 /* Representing the threads (siblings) of each logical CPU */
43 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
44 EXPORT_SYMBOL(cpu_sibling_map);
45
46 /* Representing the core map of multi-core chips of each logical CPU */
47 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
48 EXPORT_SYMBOL(cpu_core_map);
49
50 static DECLARE_COMPLETION(cpu_starting);
51 static DECLARE_COMPLETION(cpu_running);
52
53 /*
54  * A logcal cpu mask containing only one VPE per core to
55  * reduce the number of IPIs on large MT systems.
56  */
57 cpumask_t cpu_foreign_map[NR_CPUS] __read_mostly;
58 EXPORT_SYMBOL(cpu_foreign_map);
59
60 /* representing cpus for which sibling maps can be computed */
61 static cpumask_t cpu_sibling_setup_map;
62
63 /* representing cpus for which core maps can be computed */
64 static cpumask_t cpu_core_setup_map;
65
66 struct secondary_data cpuboot_data;
67 static DEFINE_PER_CPU(int, cpu_state);
68
69 enum ipi_msg_type {
70         IPI_RESCHEDULE,
71         IPI_CALL_FUNCTION,
72 };
73
74 static const char *ipi_types[NR_IPI] __tracepoint_string = {
75         [IPI_RESCHEDULE] = "Rescheduling interrupts",
76         [IPI_CALL_FUNCTION] = "Function call interrupts",
77 };
78
79 void show_ipi_list(struct seq_file *p, int prec)
80 {
81         unsigned int cpu, i;
82
83         for (i = 0; i < NR_IPI; i++) {
84                 seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : "");
85                 for_each_online_cpu(cpu)
86                         seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).ipi_irqs[i]);
87                 seq_printf(p, " LoongArch  %d  %s\n", i + 1, ipi_types[i]);
88         }
89 }
90
91 /* Send mailbox buffer via Mail_Send */
92 static void csr_mail_send(uint64_t data, int cpu, int mailbox)
93 {
94         uint64_t val;
95
96         /* Send high 32 bits */
97         val = IOCSR_MBUF_SEND_BLOCKING;
98         val |= (IOCSR_MBUF_SEND_BOX_HI(mailbox) << IOCSR_MBUF_SEND_BOX_SHIFT);
99         val |= (cpu << IOCSR_MBUF_SEND_CPU_SHIFT);
100         val |= (data & IOCSR_MBUF_SEND_H32_MASK);
101         iocsr_write64(val, LOONGARCH_IOCSR_MBUF_SEND);
102
103         /* Send low 32 bits */
104         val = IOCSR_MBUF_SEND_BLOCKING;
105         val |= (IOCSR_MBUF_SEND_BOX_LO(mailbox) << IOCSR_MBUF_SEND_BOX_SHIFT);
106         val |= (cpu << IOCSR_MBUF_SEND_CPU_SHIFT);
107         val |= (data << IOCSR_MBUF_SEND_BUF_SHIFT);
108         iocsr_write64(val, LOONGARCH_IOCSR_MBUF_SEND);
109 };
110
111 static u32 ipi_read_clear(int cpu)
112 {
113         u32 action;
114
115         /* Load the ipi register to figure out what we're supposed to do */
116         action = iocsr_read32(LOONGARCH_IOCSR_IPI_STATUS);
117         /* Clear the ipi register to clear the interrupt */
118         iocsr_write32(action, LOONGARCH_IOCSR_IPI_CLEAR);
119         wbflush();
120
121         return action;
122 }
123
124 static void ipi_write_action(int cpu, u32 action)
125 {
126         unsigned int irq = 0;
127
128         while ((irq = ffs(action))) {
129                 uint32_t val = IOCSR_IPI_SEND_BLOCKING;
130
131                 val |= (irq - 1);
132                 val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT);
133                 iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
134                 action &= ~BIT(irq - 1);
135         }
136 }
137
138 void loongson_send_ipi_single(int cpu, unsigned int action)
139 {
140         ipi_write_action(cpu_logical_map(cpu), (u32)action);
141 }
142
143 void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
144 {
145         unsigned int i;
146
147         for_each_cpu(i, mask)
148                 ipi_write_action(cpu_logical_map(i), (u32)action);
149 }
150
151 /*
152  * This function sends a 'reschedule' IPI to another CPU.
153  * it goes straight through and wastes no time serializing
154  * anything. Worst case is that we lose a reschedule ...
155  */
156 void arch_smp_send_reschedule(int cpu)
157 {
158         loongson_send_ipi_single(cpu, SMP_RESCHEDULE);
159 }
160 EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
161
162 irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
163 {
164         unsigned int action;
165         unsigned int cpu = smp_processor_id();
166
167         action = ipi_read_clear(cpu_logical_map(cpu));
168
169         if (action & SMP_RESCHEDULE) {
170                 scheduler_ipi();
171                 per_cpu(irq_stat, cpu).ipi_irqs[IPI_RESCHEDULE]++;
172         }
173
174         if (action & SMP_CALL_FUNCTION) {
175                 generic_smp_call_function_interrupt();
176                 per_cpu(irq_stat, cpu).ipi_irqs[IPI_CALL_FUNCTION]++;
177         }
178
179         return IRQ_HANDLED;
180 }
181
182 static void __init fdt_smp_setup(void)
183 {
184 #ifdef CONFIG_OF
185         unsigned int cpu, cpuid;
186         struct device_node *node = NULL;
187
188         for_each_of_cpu_node(node) {
189                 if (!of_device_is_available(node))
190                         continue;
191
192                 cpuid = of_get_cpu_hwid(node, 0);
193                 if (cpuid >= nr_cpu_ids)
194                         continue;
195
196                 if (cpuid == loongson_sysconf.boot_cpu_id) {
197                         cpu = 0;
198                         numa_add_cpu(cpu);
199                 } else {
200                         cpu = cpumask_next_zero(-1, cpu_present_mask);
201                 }
202
203                 num_processors++;
204                 set_cpu_possible(cpu, true);
205                 set_cpu_present(cpu, true);
206                 __cpu_number_map[cpuid] = cpu;
207                 __cpu_logical_map[cpu] = cpuid;
208         }
209
210         loongson_sysconf.nr_cpus = num_processors;
211         set_bit(0, &(loongson_sysconf.cores_io_master));
212 #endif
213 }
214
215 void __init loongson_smp_setup(void)
216 {
217         fdt_smp_setup();
218
219         cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package;
220         cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package;
221
222         iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
223         pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus);
224 }
225
226 void __init loongson_prepare_cpus(unsigned int max_cpus)
227 {
228         int i = 0;
229
230         parse_acpi_topology();
231
232         for (i = 0; i < loongson_sysconf.nr_cpus; i++) {
233                 set_cpu_present(i, true);
234                 csr_mail_send(0, __cpu_logical_map[i], 0);
235                 cpu_data[i].global_id = __cpu_logical_map[i];
236         }
237
238         per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
239 }
240
241 /*
242  * Setup the PC, SP, and TP of a secondary processor and start it running!
243  */
244 void loongson_boot_secondary(int cpu, struct task_struct *idle)
245 {
246         unsigned long entry;
247
248         pr_info("Booting CPU#%d...\n", cpu);
249
250         entry = __pa_symbol((unsigned long)&smpboot_entry);
251         cpuboot_data.stack = (unsigned long)__KSTK_TOS(idle);
252         cpuboot_data.thread_info = (unsigned long)task_thread_info(idle);
253
254         csr_mail_send(entry, cpu_logical_map(cpu), 0);
255
256         loongson_send_ipi_single(cpu, SMP_BOOT_CPU);
257 }
258
259 /*
260  * SMP init and finish on secondary CPUs
261  */
262 void loongson_init_secondary(void)
263 {
264         unsigned int cpu = smp_processor_id();
265         unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
266                              ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
267
268         change_csr_ecfg(ECFG0_IM, imask);
269
270         iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
271
272 #ifdef CONFIG_NUMA
273         numa_add_cpu(cpu);
274 #endif
275         per_cpu(cpu_state, cpu) = CPU_ONLINE;
276         cpu_data[cpu].package =
277                      cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
278         cpu_data[cpu].core = pptt_enabled ? cpu_data[cpu].core :
279                      cpu_logical_map(cpu) % loongson_sysconf.cores_per_package;
280 }
281
282 void loongson_smp_finish(void)
283 {
284         local_irq_enable();
285         iocsr_write64(0, LOONGARCH_IOCSR_MBUF0);
286         pr_info("CPU#%d finished\n", smp_processor_id());
287 }
288
289 #ifdef CONFIG_HOTPLUG_CPU
290
291 int loongson_cpu_disable(void)
292 {
293         unsigned long flags;
294         unsigned int cpu = smp_processor_id();
295
296         if (io_master(cpu))
297                 return -EBUSY;
298
299 #ifdef CONFIG_NUMA
300         numa_remove_cpu(cpu);
301 #endif
302         set_cpu_online(cpu, false);
303         calculate_cpu_foreign_map();
304         local_irq_save(flags);
305         irq_migrate_all_off_this_cpu();
306         clear_csr_ecfg(ECFG0_IM);
307         local_irq_restore(flags);
308         local_flush_tlb_all();
309
310         return 0;
311 }
312
313 void loongson_cpu_die(unsigned int cpu)
314 {
315         while (per_cpu(cpu_state, cpu) != CPU_DEAD)
316                 cpu_relax();
317
318         mb();
319 }
320
321 void __noreturn arch_cpu_idle_dead(void)
322 {
323         register uint64_t addr;
324         register void (*init_fn)(void);
325
326         idle_task_exit();
327         local_irq_enable();
328         set_csr_ecfg(ECFGF_IPI);
329         __this_cpu_write(cpu_state, CPU_DEAD);
330
331         __smp_mb();
332         do {
333                 __asm__ __volatile__("idle 0\n\t");
334                 addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0);
335         } while (addr == 0);
336
337         init_fn = (void *)TO_CACHE(addr);
338         iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);
339
340         init_fn();
341         BUG();
342 }
343
344 #endif
345
346 /*
347  * Power management
348  */
349 #ifdef CONFIG_PM
350
351 static int loongson_ipi_suspend(void)
352 {
353         return 0;
354 }
355
356 static void loongson_ipi_resume(void)
357 {
358         iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
359 }
360
361 static struct syscore_ops loongson_ipi_syscore_ops = {
362         .resume         = loongson_ipi_resume,
363         .suspend        = loongson_ipi_suspend,
364 };
365
366 /*
367  * Enable boot cpu ipi before enabling nonboot cpus
368  * during syscore_resume.
369  */
370 static int __init ipi_pm_init(void)
371 {
372         register_syscore_ops(&loongson_ipi_syscore_ops);
373         return 0;
374 }
375
376 core_initcall(ipi_pm_init);
377 #endif
378
379 static inline void set_cpu_sibling_map(int cpu)
380 {
381         int i;
382
383         cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
384
385         for_each_cpu(i, &cpu_sibling_setup_map) {
386                 if (cpus_are_siblings(cpu, i)) {
387                         cpumask_set_cpu(i, &cpu_sibling_map[cpu]);
388                         cpumask_set_cpu(cpu, &cpu_sibling_map[i]);
389                 }
390         }
391 }
392
393 static inline void set_cpu_core_map(int cpu)
394 {
395         int i;
396
397         cpumask_set_cpu(cpu, &cpu_core_setup_map);
398
399         for_each_cpu(i, &cpu_core_setup_map) {
400                 if (cpu_data[cpu].package == cpu_data[i].package) {
401                         cpumask_set_cpu(i, &cpu_core_map[cpu]);
402                         cpumask_set_cpu(cpu, &cpu_core_map[i]);
403                 }
404         }
405 }
406
407 /*
408  * Calculate a new cpu_foreign_map mask whenever a
409  * new cpu appears or disappears.
410  */
411 void calculate_cpu_foreign_map(void)
412 {
413         int i, k, core_present;
414         cpumask_t temp_foreign_map;
415
416         /* Re-calculate the mask */
417         cpumask_clear(&temp_foreign_map);
418         for_each_online_cpu(i) {
419                 core_present = 0;
420                 for_each_cpu(k, &temp_foreign_map)
421                         if (cpus_are_siblings(i, k))
422                                 core_present = 1;
423                 if (!core_present)
424                         cpumask_set_cpu(i, &temp_foreign_map);
425         }
426
427         for_each_online_cpu(i)
428                 cpumask_andnot(&cpu_foreign_map[i],
429                                &temp_foreign_map, &cpu_sibling_map[i]);
430 }
431
432 /* Preload SMP state for boot cpu */
433 void smp_prepare_boot_cpu(void)
434 {
435         unsigned int cpu, node, rr_node;
436
437         set_cpu_possible(0, true);
438         set_cpu_online(0, true);
439         set_my_cpu_offset(per_cpu_offset(0));
440
441         rr_node = first_node(node_online_map);
442         for_each_possible_cpu(cpu) {
443                 node = early_cpu_to_node(cpu);
444
445                 /*
446                  * The mapping between present cpus and nodes has been
447                  * built during MADT and SRAT parsing.
448                  *
449                  * If possible cpus = present cpus here, early_cpu_to_node
450                  * will return valid node.
451                  *
452                  * If possible cpus > present cpus here (e.g. some possible
453                  * cpus will be added by cpu-hotplug later), for possible but
454                  * not present cpus, early_cpu_to_node will return NUMA_NO_NODE,
455                  * and we just map them to online nodes in round-robin way.
456                  * Once hotplugged, new correct mapping will be built for them.
457                  */
458                 if (node != NUMA_NO_NODE)
459                         set_cpu_numa_node(cpu, node);
460                 else {
461                         set_cpu_numa_node(cpu, rr_node);
462                         rr_node = next_node_in(rr_node, node_online_map);
463                 }
464         }
465 }
466
467 /* called from main before smp_init() */
468 void __init smp_prepare_cpus(unsigned int max_cpus)
469 {
470         init_new_context(current, &init_mm);
471         current_thread_info()->cpu = 0;
472         loongson_prepare_cpus(max_cpus);
473         set_cpu_sibling_map(0);
474         set_cpu_core_map(0);
475         calculate_cpu_foreign_map();
476 #ifndef CONFIG_HOTPLUG_CPU
477         init_cpu_present(cpu_possible_mask);
478 #endif
479 }
480
481 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
482 {
483         loongson_boot_secondary(cpu, tidle);
484
485         /* Wait for CPU to start and be ready to sync counters */
486         if (!wait_for_completion_timeout(&cpu_starting,
487                                          msecs_to_jiffies(5000))) {
488                 pr_crit("CPU%u: failed to start\n", cpu);
489                 return -EIO;
490         }
491
492         /* Wait for CPU to finish startup & mark itself online before return */
493         wait_for_completion(&cpu_running);
494
495         return 0;
496 }
497
498 /*
499  * First C code run on the secondary CPUs after being started up by
500  * the master.
501  */
502 asmlinkage void start_secondary(void)
503 {
504         unsigned int cpu;
505
506         sync_counter();
507         cpu = raw_smp_processor_id();
508         set_my_cpu_offset(per_cpu_offset(cpu));
509         rcutree_report_cpu_starting(cpu);
510
511         cpu_probe();
512         constant_clockevent_init();
513         loongson_init_secondary();
514
515         set_cpu_sibling_map(cpu);
516         set_cpu_core_map(cpu);
517
518         notify_cpu_starting(cpu);
519
520         /* Notify boot CPU that we're starting */
521         complete(&cpu_starting);
522
523         /* The CPU is running, now mark it online */
524         set_cpu_online(cpu, true);
525
526         calculate_cpu_foreign_map();
527
528         /*
529          * Notify boot CPU that we're up & online and it can safely return
530          * from __cpu_up()
531          */
532         complete(&cpu_running);
533
534         /*
535          * irq will be enabled in loongson_smp_finish(), enabling it too
536          * early is dangerous.
537          */
538         WARN_ON_ONCE(!irqs_disabled());
539         loongson_smp_finish();
540
541         cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
542 }
543
544 void __init smp_cpus_done(unsigned int max_cpus)
545 {
546 }
547
548 static void stop_this_cpu(void *dummy)
549 {
550         set_cpu_online(smp_processor_id(), false);
551         calculate_cpu_foreign_map();
552         local_irq_disable();
553         while (true);
554 }
555
556 void smp_send_stop(void)
557 {
558         smp_call_function(stop_this_cpu, NULL, 0);
559 }
560
561 #ifdef CONFIG_PROFILING
562 int setup_profiling_timer(unsigned int multiplier)
563 {
564         return 0;
565 }
566 #endif
567
568 static void flush_tlb_all_ipi(void *info)
569 {
570         local_flush_tlb_all();
571 }
572
573 void flush_tlb_all(void)
574 {
575         on_each_cpu(flush_tlb_all_ipi, NULL, 1);
576 }
577
578 static void flush_tlb_mm_ipi(void *mm)
579 {
580         local_flush_tlb_mm((struct mm_struct *)mm);
581 }
582
583 void flush_tlb_mm(struct mm_struct *mm)
584 {
585         if (atomic_read(&mm->mm_users) == 0)
586                 return;         /* happens as a result of exit_mmap() */
587
588         preempt_disable();
589
590         if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) {
591                 on_each_cpu_mask(mm_cpumask(mm), flush_tlb_mm_ipi, mm, 1);
592         } else {
593                 unsigned int cpu;
594
595                 for_each_online_cpu(cpu) {
596                         if (cpu != smp_processor_id() && cpu_context(cpu, mm))
597                                 cpu_context(cpu, mm) = 0;
598                 }
599                 local_flush_tlb_mm(mm);
600         }
601
602         preempt_enable();
603 }
604
605 struct flush_tlb_data {
606         struct vm_area_struct *vma;
607         unsigned long addr1;
608         unsigned long addr2;
609 };
610
611 static void flush_tlb_range_ipi(void *info)
612 {
613         struct flush_tlb_data *fd = info;
614
615         local_flush_tlb_range(fd->vma, fd->addr1, fd->addr2);
616 }
617
618 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
619 {
620         struct mm_struct *mm = vma->vm_mm;
621
622         preempt_disable();
623         if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) {
624                 struct flush_tlb_data fd = {
625                         .vma = vma,
626                         .addr1 = start,
627                         .addr2 = end,
628                 };
629
630                 on_each_cpu_mask(mm_cpumask(mm), flush_tlb_range_ipi, &fd, 1);
631         } else {
632                 unsigned int cpu;
633
634                 for_each_online_cpu(cpu) {
635                         if (cpu != smp_processor_id() && cpu_context(cpu, mm))
636                                 cpu_context(cpu, mm) = 0;
637                 }
638                 local_flush_tlb_range(vma, start, end);
639         }
640         preempt_enable();
641 }
642
643 static void flush_tlb_kernel_range_ipi(void *info)
644 {
645         struct flush_tlb_data *fd = info;
646
647         local_flush_tlb_kernel_range(fd->addr1, fd->addr2);
648 }
649
650 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
651 {
652         struct flush_tlb_data fd = {
653                 .addr1 = start,
654                 .addr2 = end,
655         };
656
657         on_each_cpu(flush_tlb_kernel_range_ipi, &fd, 1);
658 }
659
660 static void flush_tlb_page_ipi(void *info)
661 {
662         struct flush_tlb_data *fd = info;
663
664         local_flush_tlb_page(fd->vma, fd->addr1);
665 }
666
667 void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
668 {
669         preempt_disable();
670         if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) {
671                 struct flush_tlb_data fd = {
672                         .vma = vma,
673                         .addr1 = page,
674                 };
675
676                 on_each_cpu_mask(mm_cpumask(vma->vm_mm), flush_tlb_page_ipi, &fd, 1);
677         } else {
678                 unsigned int cpu;
679
680                 for_each_online_cpu(cpu) {
681                         if (cpu != smp_processor_id() && cpu_context(cpu, vma->vm_mm))
682                                 cpu_context(cpu, vma->vm_mm) = 0;
683                 }
684                 local_flush_tlb_page(vma, page);
685         }
686         preempt_enable();
687 }
688 EXPORT_SYMBOL(flush_tlb_page);
689
690 static void flush_tlb_one_ipi(void *info)
691 {
692         unsigned long vaddr = (unsigned long) info;
693
694         local_flush_tlb_one(vaddr);
695 }
696
697 void flush_tlb_one(unsigned long vaddr)
698 {
699         on_each_cpu(flush_tlb_one_ipi, (void *)vaddr, 1);
700 }
701 EXPORT_SYMBOL(flush_tlb_one);