arm64: dts: qcom: sm8550: add TRNG node
[linux-modified.git] / kernel / trace / trace_osnoise.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * OS Noise Tracer: computes the OS Noise suffered by a running thread.
4  * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread.
5  *
6  * Based on "hwlat_detector" tracer by:
7  *   Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
8  *   Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
9  *   With feedback from Clark Williams <williams@redhat.com>
10  *
11  * And also based on the rtsl tracer presented on:
12  *  DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux
13  *  scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems
14  *  (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020.
15  *
16  * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
17  */
18
19 #include <linux/kthread.h>
20 #include <linux/tracefs.h>
21 #include <linux/uaccess.h>
22 #include <linux/cpumask.h>
23 #include <linux/delay.h>
24 #include <linux/sched/clock.h>
25 #include <uapi/linux/sched/types.h>
26 #include <linux/sched.h>
27 #include "trace.h"
28
29 #ifdef CONFIG_X86_LOCAL_APIC
30 #include <asm/trace/irq_vectors.h>
31 #undef TRACE_INCLUDE_PATH
32 #undef TRACE_INCLUDE_FILE
33 #endif /* CONFIG_X86_LOCAL_APIC */
34
35 #include <trace/events/irq.h>
36 #include <trace/events/sched.h>
37
38 #define CREATE_TRACE_POINTS
39 #include <trace/events/osnoise.h>
40
41 /*
42  * Default values.
43  */
44 #define BANNER                  "osnoise: "
45 #define DEFAULT_SAMPLE_PERIOD   1000000                 /* 1s */
46 #define DEFAULT_SAMPLE_RUNTIME  1000000                 /* 1s */
47
48 #define DEFAULT_TIMERLAT_PERIOD 1000                    /* 1ms */
49 #define DEFAULT_TIMERLAT_PRIO   95                      /* FIFO 95 */
50
51 /*
52  * osnoise/options entries.
53  */
54 enum osnoise_options_index {
55         OSN_DEFAULTS = 0,
56         OSN_WORKLOAD,
57         OSN_PANIC_ON_STOP,
58         OSN_PREEMPT_DISABLE,
59         OSN_IRQ_DISABLE,
60         OSN_MAX
61 };
62
63 static const char * const osnoise_options_str[OSN_MAX] = {
64                                                         "DEFAULTS",
65                                                         "OSNOISE_WORKLOAD",
66                                                         "PANIC_ON_STOP",
67                                                         "OSNOISE_PREEMPT_DISABLE",
68                                                         "OSNOISE_IRQ_DISABLE" };
69
70 #define OSN_DEFAULT_OPTIONS             0x2
71 static unsigned long osnoise_options    = OSN_DEFAULT_OPTIONS;
72
73 /*
74  * trace_array of the enabled osnoise/timerlat instances.
75  */
76 struct osnoise_instance {
77         struct list_head        list;
78         struct trace_array      *tr;
79 };
80
81 static struct list_head osnoise_instances;
82
83 static bool osnoise_has_registered_instances(void)
84 {
85         return !!list_first_or_null_rcu(&osnoise_instances,
86                                         struct osnoise_instance,
87                                         list);
88 }
89
90 /*
91  * osnoise_instance_registered - check if a tr is already registered
92  */
93 static int osnoise_instance_registered(struct trace_array *tr)
94 {
95         struct osnoise_instance *inst;
96         int found = 0;
97
98         rcu_read_lock();
99         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
100                 if (inst->tr == tr)
101                         found = 1;
102         }
103         rcu_read_unlock();
104
105         return found;
106 }
107
108 /*
109  * osnoise_register_instance - register a new trace instance
110  *
111  * Register a trace_array *tr in the list of instances running
112  * osnoise/timerlat tracers.
113  */
114 static int osnoise_register_instance(struct trace_array *tr)
115 {
116         struct osnoise_instance *inst;
117
118         /*
119          * register/unregister serialization is provided by trace's
120          * trace_types_lock.
121          */
122         lockdep_assert_held(&trace_types_lock);
123
124         inst = kmalloc(sizeof(*inst), GFP_KERNEL);
125         if (!inst)
126                 return -ENOMEM;
127
128         INIT_LIST_HEAD_RCU(&inst->list);
129         inst->tr = tr;
130         list_add_tail_rcu(&inst->list, &osnoise_instances);
131
132         return 0;
133 }
134
135 /*
136  *  osnoise_unregister_instance - unregister a registered trace instance
137  *
138  * Remove the trace_array *tr from the list of instances running
139  * osnoise/timerlat tracers.
140  */
141 static void osnoise_unregister_instance(struct trace_array *tr)
142 {
143         struct osnoise_instance *inst;
144         int found = 0;
145
146         /*
147          * register/unregister serialization is provided by trace's
148          * trace_types_lock.
149          */
150         list_for_each_entry_rcu(inst, &osnoise_instances, list,
151                                 lockdep_is_held(&trace_types_lock)) {
152                 if (inst->tr == tr) {
153                         list_del_rcu(&inst->list);
154                         found = 1;
155                         break;
156                 }
157         }
158
159         if (!found)
160                 return;
161
162         kvfree_rcu_mightsleep(inst);
163 }
164
165 /*
166  * NMI runtime info.
167  */
168 struct osn_nmi {
169         u64     count;
170         u64     delta_start;
171 };
172
173 /*
174  * IRQ runtime info.
175  */
176 struct osn_irq {
177         u64     count;
178         u64     arrival_time;
179         u64     delta_start;
180 };
181
182 #define IRQ_CONTEXT     0
183 #define THREAD_CONTEXT  1
184 #define THREAD_URET     2
185 /*
186  * sofirq runtime info.
187  */
188 struct osn_softirq {
189         u64     count;
190         u64     arrival_time;
191         u64     delta_start;
192 };
193
194 /*
195  * thread runtime info.
196  */
197 struct osn_thread {
198         u64     count;
199         u64     arrival_time;
200         u64     delta_start;
201 };
202
203 /*
204  * Runtime information: this structure saves the runtime information used by
205  * one sampling thread.
206  */
207 struct osnoise_variables {
208         struct task_struct      *kthread;
209         bool                    sampling;
210         pid_t                   pid;
211         struct osn_nmi          nmi;
212         struct osn_irq          irq;
213         struct osn_softirq      softirq;
214         struct osn_thread       thread;
215         local_t                 int_counter;
216 };
217
218 /*
219  * Per-cpu runtime information.
220  */
221 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
222
223 /*
224  * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
225  */
226 static inline struct osnoise_variables *this_cpu_osn_var(void)
227 {
228         return this_cpu_ptr(&per_cpu_osnoise_var);
229 }
230
231 #ifdef CONFIG_TIMERLAT_TRACER
232 /*
233  * Runtime information for the timer mode.
234  */
235 struct timerlat_variables {
236         struct task_struct      *kthread;
237         struct hrtimer          timer;
238         u64                     rel_period;
239         u64                     abs_period;
240         bool                    tracing_thread;
241         u64                     count;
242         bool                    uthread_migrate;
243 };
244
245 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
246
247 /*
248  * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
249  */
250 static inline struct timerlat_variables *this_cpu_tmr_var(void)
251 {
252         return this_cpu_ptr(&per_cpu_timerlat_var);
253 }
254
255 /*
256  * tlat_var_reset - Reset the values of the given timerlat_variables
257  */
258 static inline void tlat_var_reset(void)
259 {
260         struct timerlat_variables *tlat_var;
261         int cpu;
262         /*
263          * So far, all the values are initialized as 0, so
264          * zeroing the structure is perfect.
265          */
266         for_each_cpu(cpu, cpu_online_mask) {
267                 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
268                 memset(tlat_var, 0, sizeof(*tlat_var));
269         }
270 }
271 #else /* CONFIG_TIMERLAT_TRACER */
272 #define tlat_var_reset()        do {} while (0)
273 #endif /* CONFIG_TIMERLAT_TRACER */
274
275 /*
276  * osn_var_reset - Reset the values of the given osnoise_variables
277  */
278 static inline void osn_var_reset(void)
279 {
280         struct osnoise_variables *osn_var;
281         int cpu;
282
283         /*
284          * So far, all the values are initialized as 0, so
285          * zeroing the structure is perfect.
286          */
287         for_each_cpu(cpu, cpu_online_mask) {
288                 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
289                 memset(osn_var, 0, sizeof(*osn_var));
290         }
291 }
292
293 /*
294  * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
295  */
296 static inline void osn_var_reset_all(void)
297 {
298         osn_var_reset();
299         tlat_var_reset();
300 }
301
302 /*
303  * Tells NMIs to call back to the osnoise tracer to record timestamps.
304  */
305 bool trace_osnoise_callback_enabled;
306
307 /*
308  * osnoise sample structure definition. Used to store the statistics of a
309  * sample run.
310  */
311 struct osnoise_sample {
312         u64                     runtime;        /* runtime */
313         u64                     noise;          /* noise */
314         u64                     max_sample;     /* max single noise sample */
315         int                     hw_count;       /* # HW (incl. hypervisor) interference */
316         int                     nmi_count;      /* # NMIs during this sample */
317         int                     irq_count;      /* # IRQs during this sample */
318         int                     softirq_count;  /* # softirqs during this sample */
319         int                     thread_count;   /* # threads during this sample */
320 };
321
322 #ifdef CONFIG_TIMERLAT_TRACER
323 /*
324  * timerlat sample structure definition. Used to store the statistics of
325  * a sample run.
326  */
327 struct timerlat_sample {
328         u64                     timer_latency;  /* timer_latency */
329         unsigned int            seqnum;         /* unique sequence */
330         int                     context;        /* timer context */
331 };
332 #endif
333
334 /*
335  * Protect the interface.
336  */
337 static struct mutex interface_lock;
338
339 /*
340  * Tracer data.
341  */
342 static struct osnoise_data {
343         u64     sample_period;          /* total sampling period */
344         u64     sample_runtime;         /* active sampling portion of period */
345         u64     stop_tracing;           /* stop trace in the internal operation (loop/irq) */
346         u64     stop_tracing_total;     /* stop trace in the final operation (report/thread) */
347 #ifdef CONFIG_TIMERLAT_TRACER
348         u64     timerlat_period;        /* timerlat period */
349         u64     print_stack;            /* print IRQ stack if total > */
350         int     timerlat_tracer;        /* timerlat tracer */
351 #endif
352         bool    tainted;                /* infor users and developers about a problem */
353 } osnoise_data = {
354         .sample_period                  = DEFAULT_SAMPLE_PERIOD,
355         .sample_runtime                 = DEFAULT_SAMPLE_RUNTIME,
356         .stop_tracing                   = 0,
357         .stop_tracing_total             = 0,
358 #ifdef CONFIG_TIMERLAT_TRACER
359         .print_stack                    = 0,
360         .timerlat_period                = DEFAULT_TIMERLAT_PERIOD,
361         .timerlat_tracer                = 0,
362 #endif
363 };
364
365 #ifdef CONFIG_TIMERLAT_TRACER
366 static inline bool timerlat_enabled(void)
367 {
368         return osnoise_data.timerlat_tracer;
369 }
370
371 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
372 {
373         struct timerlat_variables *tlat_var = this_cpu_tmr_var();
374         /*
375          * If the timerlat is enabled, but the irq handler did
376          * not run yet enabling timerlat_tracer, do not trace.
377          */
378         if (!tlat_var->tracing_thread) {
379                 osn_var->softirq.arrival_time = 0;
380                 osn_var->softirq.delta_start = 0;
381                 return 0;
382         }
383         return 1;
384 }
385
386 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
387 {
388         struct timerlat_variables *tlat_var = this_cpu_tmr_var();
389         /*
390          * If the timerlat is enabled, but the irq handler did
391          * not run yet enabling timerlat_tracer, do not trace.
392          */
393         if (!tlat_var->tracing_thread) {
394                 osn_var->thread.delta_start = 0;
395                 osn_var->thread.arrival_time = 0;
396                 return 0;
397         }
398         return 1;
399 }
400 #else /* CONFIG_TIMERLAT_TRACER */
401 static inline bool timerlat_enabled(void)
402 {
403         return false;
404 }
405
406 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
407 {
408         return 1;
409 }
410 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
411 {
412         return 1;
413 }
414 #endif
415
416 #ifdef CONFIG_PREEMPT_RT
417 /*
418  * Print the osnoise header info.
419  */
420 static void print_osnoise_headers(struct seq_file *s)
421 {
422         if (osnoise_data.tainted)
423                 seq_puts(s, "# osnoise is tainted!\n");
424
425         seq_puts(s, "#                                _-------=> irqs-off\n");
426         seq_puts(s, "#                               / _------=> need-resched\n");
427         seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
428         seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
429         seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
430         seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
431         seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
432
433         seq_puts(s, "#                              |||||| /          ");
434         seq_puts(s, "                                     MAX\n");
435
436         seq_puts(s, "#                              ||||| /                         ");
437         seq_puts(s, "                    SINGLE      Interference counters:\n");
438
439         seq_puts(s, "#                              |||||||               RUNTIME   ");
440         seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
441
442         seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    IN US    ");
443         seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
444
445         seq_puts(s, "#              | |         |   |||||||      |           |      ");
446         seq_puts(s, "       |    |            |      |      |      |      |      |\n");
447 }
448 #else /* CONFIG_PREEMPT_RT */
449 static void print_osnoise_headers(struct seq_file *s)
450 {
451         if (osnoise_data.tainted)
452                 seq_puts(s, "# osnoise is tainted!\n");
453
454         seq_puts(s, "#                                _-----=> irqs-off\n");
455         seq_puts(s, "#                               / _----=> need-resched\n");
456         seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
457         seq_puts(s, "#                              || / _--=> preempt-depth\n");
458         seq_puts(s, "#                              ||| / _-=> migrate-disable     ");
459         seq_puts(s, "                    MAX\n");
460         seq_puts(s, "#                              |||| /     delay               ");
461         seq_puts(s, "                    SINGLE      Interference counters:\n");
462
463         seq_puts(s, "#                              |||||               RUNTIME   ");
464         seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
465
466         seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP    IN US    ");
467         seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
468
469         seq_puts(s, "#              | |         |   |||||      |           |      ");
470         seq_puts(s, "       |    |            |      |      |      |      |      |\n");
471 }
472 #endif /* CONFIG_PREEMPT_RT */
473
474 /*
475  * osnoise_taint - report an osnoise error.
476  */
477 #define osnoise_taint(msg) ({                                                   \
478         struct osnoise_instance *inst;                                          \
479         struct trace_buffer *buffer;                                            \
480                                                                                 \
481         rcu_read_lock();                                                        \
482         list_for_each_entry_rcu(inst, &osnoise_instances, list) {               \
483                 buffer = inst->tr->array_buffer.buffer;                         \
484                 trace_array_printk_buf(buffer, _THIS_IP_, msg);                 \
485         }                                                                       \
486         rcu_read_unlock();                                                      \
487         osnoise_data.tainted = true;                                            \
488 })
489
490 /*
491  * Record an osnoise_sample into the tracer buffer.
492  */
493 static void
494 __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
495 {
496         struct trace_event_call *call = &event_osnoise;
497         struct ring_buffer_event *event;
498         struct osnoise_entry *entry;
499
500         event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry),
501                                           tracing_gen_ctx());
502         if (!event)
503                 return;
504         entry   = ring_buffer_event_data(event);
505         entry->runtime          = sample->runtime;
506         entry->noise            = sample->noise;
507         entry->max_sample       = sample->max_sample;
508         entry->hw_count         = sample->hw_count;
509         entry->nmi_count        = sample->nmi_count;
510         entry->irq_count        = sample->irq_count;
511         entry->softirq_count    = sample->softirq_count;
512         entry->thread_count     = sample->thread_count;
513
514         if (!call_filter_check_discard(call, entry, buffer, event))
515                 trace_buffer_unlock_commit_nostack(buffer, event);
516 }
517
518 /*
519  * Record an osnoise_sample on all osnoise instances.
520  */
521 static void trace_osnoise_sample(struct osnoise_sample *sample)
522 {
523         struct osnoise_instance *inst;
524         struct trace_buffer *buffer;
525
526         rcu_read_lock();
527         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
528                 buffer = inst->tr->array_buffer.buffer;
529                 __trace_osnoise_sample(sample, buffer);
530         }
531         rcu_read_unlock();
532 }
533
534 #ifdef CONFIG_TIMERLAT_TRACER
535 /*
536  * Print the timerlat header info.
537  */
538 #ifdef CONFIG_PREEMPT_RT
539 static void print_timerlat_headers(struct seq_file *s)
540 {
541         seq_puts(s, "#                                _-------=> irqs-off\n");
542         seq_puts(s, "#                               / _------=> need-resched\n");
543         seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
544         seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
545         seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
546         seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
547         seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
548         seq_puts(s, "#                              |||||| /\n");
549         seq_puts(s, "#                              |||||||             ACTIVATION\n");
550         seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    ID     ");
551         seq_puts(s, "       CONTEXT                LATENCY\n");
552         seq_puts(s, "#              | |         |   |||||||      |         |      ");
553         seq_puts(s, "            |                       |\n");
554 }
555 #else /* CONFIG_PREEMPT_RT */
556 static void print_timerlat_headers(struct seq_file *s)
557 {
558         seq_puts(s, "#                                _-----=> irqs-off\n");
559         seq_puts(s, "#                               / _----=> need-resched\n");
560         seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
561         seq_puts(s, "#                              || / _--=> preempt-depth\n");
562         seq_puts(s, "#                              ||| / _-=> migrate-disable\n");
563         seq_puts(s, "#                              |||| /     delay\n");
564         seq_puts(s, "#                              |||||            ACTIVATION\n");
565         seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP   ID      ");
566         seq_puts(s, "      CONTEXT                 LATENCY\n");
567         seq_puts(s, "#              | |         |   |||||      |         |      ");
568         seq_puts(s, "            |                       |\n");
569 }
570 #endif /* CONFIG_PREEMPT_RT */
571
572 static void
573 __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
574 {
575         struct trace_event_call *call = &event_osnoise;
576         struct ring_buffer_event *event;
577         struct timerlat_entry *entry;
578
579         event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry),
580                                           tracing_gen_ctx());
581         if (!event)
582                 return;
583         entry   = ring_buffer_event_data(event);
584         entry->seqnum                   = sample->seqnum;
585         entry->context                  = sample->context;
586         entry->timer_latency            = sample->timer_latency;
587
588         if (!call_filter_check_discard(call, entry, buffer, event))
589                 trace_buffer_unlock_commit_nostack(buffer, event);
590 }
591
592 /*
593  * Record an timerlat_sample into the tracer buffer.
594  */
595 static void trace_timerlat_sample(struct timerlat_sample *sample)
596 {
597         struct osnoise_instance *inst;
598         struct trace_buffer *buffer;
599
600         rcu_read_lock();
601         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
602                 buffer = inst->tr->array_buffer.buffer;
603                 __trace_timerlat_sample(sample, buffer);
604         }
605         rcu_read_unlock();
606 }
607
608 #ifdef CONFIG_STACKTRACE
609
610 #define MAX_CALLS       256
611
612 /*
613  * Stack trace will take place only at IRQ level, so, no need
614  * to control nesting here.
615  */
616 struct trace_stack {
617         int             stack_size;
618         int             nr_entries;
619         unsigned long   calls[MAX_CALLS];
620 };
621
622 static DEFINE_PER_CPU(struct trace_stack, trace_stack);
623
624 /*
625  * timerlat_save_stack - save a stack trace without printing
626  *
627  * Save the current stack trace without printing. The
628  * stack will be printed later, after the end of the measurement.
629  */
630 static void timerlat_save_stack(int skip)
631 {
632         unsigned int size, nr_entries;
633         struct trace_stack *fstack;
634
635         fstack = this_cpu_ptr(&trace_stack);
636
637         size = ARRAY_SIZE(fstack->calls);
638
639         nr_entries = stack_trace_save(fstack->calls, size, skip);
640
641         fstack->stack_size = nr_entries * sizeof(unsigned long);
642         fstack->nr_entries = nr_entries;
643
644         return;
645
646 }
647
648 static void
649 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
650 {
651         struct trace_event_call *call = &event_osnoise;
652         struct ring_buffer_event *event;
653         struct stack_entry *entry;
654
655         event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size,
656                                           tracing_gen_ctx());
657         if (!event)
658                 return;
659
660         entry = ring_buffer_event_data(event);
661
662         memcpy(&entry->caller, fstack->calls, size);
663         entry->size = fstack->nr_entries;
664
665         if (!call_filter_check_discard(call, entry, buffer, event))
666                 trace_buffer_unlock_commit_nostack(buffer, event);
667 }
668
669 /*
670  * timerlat_dump_stack - dump a stack trace previously saved
671  */
672 static void timerlat_dump_stack(u64 latency)
673 {
674         struct osnoise_instance *inst;
675         struct trace_buffer *buffer;
676         struct trace_stack *fstack;
677         unsigned int size;
678
679         /*
680          * trace only if latency > print_stack config, if enabled.
681          */
682         if (!osnoise_data.print_stack || osnoise_data.print_stack > latency)
683                 return;
684
685         preempt_disable_notrace();
686         fstack = this_cpu_ptr(&trace_stack);
687         size = fstack->stack_size;
688
689         rcu_read_lock();
690         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
691                 buffer = inst->tr->array_buffer.buffer;
692                 __timerlat_dump_stack(buffer, fstack, size);
693
694         }
695         rcu_read_unlock();
696         preempt_enable_notrace();
697 }
698 #else /* CONFIG_STACKTRACE */
699 #define timerlat_dump_stack(u64 latency) do {} while (0)
700 #define timerlat_save_stack(a) do {} while (0)
701 #endif /* CONFIG_STACKTRACE */
702 #endif /* CONFIG_TIMERLAT_TRACER */
703
704 /*
705  * Macros to encapsulate the time capturing infrastructure.
706  */
707 #define time_get()      trace_clock_local()
708 #define time_to_us(x)   div_u64(x, 1000)
709 #define time_sub(a, b)  ((a) - (b))
710
711 /*
712  * cond_move_irq_delta_start - Forward the delta_start of a running IRQ
713  *
714  * If an IRQ is preempted by an NMI, its delta_start is pushed forward
715  * to discount the NMI interference.
716  *
717  * See get_int_safe_duration().
718  */
719 static inline void
720 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
721 {
722         if (osn_var->irq.delta_start)
723                 osn_var->irq.delta_start += duration;
724 }
725
726 #ifndef CONFIG_PREEMPT_RT
727 /*
728  * cond_move_softirq_delta_start - Forward the delta_start of a running softirq.
729  *
730  * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed
731  * forward to discount the interference.
732  *
733  * See get_int_safe_duration().
734  */
735 static inline void
736 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
737 {
738         if (osn_var->softirq.delta_start)
739                 osn_var->softirq.delta_start += duration;
740 }
741 #else /* CONFIG_PREEMPT_RT */
742 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0)
743 #endif
744
745 /*
746  * cond_move_thread_delta_start - Forward the delta_start of a running thread
747  *
748  * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start
749  * is pushed forward to discount the interference.
750  *
751  * See get_int_safe_duration().
752  */
753 static inline void
754 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
755 {
756         if (osn_var->thread.delta_start)
757                 osn_var->thread.delta_start += duration;
758 }
759
760 /*
761  * get_int_safe_duration - Get the duration of a window
762  *
763  * The irq, softirq and thread varaibles need to have its duration without
764  * the interference from higher priority interrupts. Instead of keeping a
765  * variable to discount the interrupt interference from these variables, the
766  * starting time of these variables are pushed forward with the interrupt's
767  * duration. In this way, a single variable is used to:
768  *
769  *   - Know if a given window is being measured.
770  *   - Account its duration.
771  *   - Discount the interference.
772  *
773  * To avoid getting inconsistent values, e.g.,:
774  *
775  *      now = time_get()
776  *              --->    interrupt!
777  *                      delta_start -= int duration;
778  *              <---
779  *      duration = now - delta_start;
780  *
781  *      result: negative duration if the variable duration before the
782  *      interrupt was smaller than the interrupt execution.
783  *
784  * A counter of interrupts is used. If the counter increased, try
785  * to capture an interference safe duration.
786  */
787 static inline s64
788 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
789 {
790         u64 int_counter, now;
791         s64 duration;
792
793         do {
794                 int_counter = local_read(&osn_var->int_counter);
795                 /* synchronize with interrupts */
796                 barrier();
797
798                 now = time_get();
799                 duration = (now - *delta_start);
800
801                 /* synchronize with interrupts */
802                 barrier();
803         } while (int_counter != local_read(&osn_var->int_counter));
804
805         /*
806          * This is an evidence of race conditions that cause
807          * a value to be "discounted" too much.
808          */
809         if (duration < 0)
810                 osnoise_taint("Negative duration!\n");
811
812         *delta_start = 0;
813
814         return duration;
815 }
816
817 /*
818  *
819  * set_int_safe_time - Save the current time on *time, aware of interference
820  *
821  * Get the time, taking into consideration a possible interference from
822  * higher priority interrupts.
823  *
824  * See get_int_safe_duration() for an explanation.
825  */
826 static u64
827 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
828 {
829         u64 int_counter;
830
831         do {
832                 int_counter = local_read(&osn_var->int_counter);
833                 /* synchronize with interrupts */
834                 barrier();
835
836                 *time = time_get();
837
838                 /* synchronize with interrupts */
839                 barrier();
840         } while (int_counter != local_read(&osn_var->int_counter));
841
842         return int_counter;
843 }
844
845 #ifdef CONFIG_TIMERLAT_TRACER
846 /*
847  * copy_int_safe_time - Copy *src into *desc aware of interference
848  */
849 static u64
850 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src)
851 {
852         u64 int_counter;
853
854         do {
855                 int_counter = local_read(&osn_var->int_counter);
856                 /* synchronize with interrupts */
857                 barrier();
858
859                 *dst = *src;
860
861                 /* synchronize with interrupts */
862                 barrier();
863         } while (int_counter != local_read(&osn_var->int_counter));
864
865         return int_counter;
866 }
867 #endif /* CONFIG_TIMERLAT_TRACER */
868
869 /*
870  * trace_osnoise_callback - NMI entry/exit callback
871  *
872  * This function is called at the entry and exit NMI code. The bool enter
873  * distinguishes between either case. This function is used to note a NMI
874  * occurrence, compute the noise caused by the NMI, and to remove the noise
875  * it is potentially causing on other interference variables.
876  */
877 void trace_osnoise_callback(bool enter)
878 {
879         struct osnoise_variables *osn_var = this_cpu_osn_var();
880         u64 duration;
881
882         if (!osn_var->sampling)
883                 return;
884
885         /*
886          * Currently trace_clock_local() calls sched_clock() and the
887          * generic version is not NMI safe.
888          */
889         if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
890                 if (enter) {
891                         osn_var->nmi.delta_start = time_get();
892                         local_inc(&osn_var->int_counter);
893                 } else {
894                         duration = time_get() - osn_var->nmi.delta_start;
895
896                         trace_nmi_noise(osn_var->nmi.delta_start, duration);
897
898                         cond_move_irq_delta_start(osn_var, duration);
899                         cond_move_softirq_delta_start(osn_var, duration);
900                         cond_move_thread_delta_start(osn_var, duration);
901                 }
902         }
903
904         if (enter)
905                 osn_var->nmi.count++;
906 }
907
908 /*
909  * osnoise_trace_irq_entry - Note the starting of an IRQ
910  *
911  * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs,
912  * it is safe to use a single variable (ons_var->irq) to save the statistics.
913  * The arrival_time is used to report... the arrival time. The delta_start
914  * is used to compute the duration at the IRQ exit handler. See
915  * cond_move_irq_delta_start().
916  */
917 void osnoise_trace_irq_entry(int id)
918 {
919         struct osnoise_variables *osn_var = this_cpu_osn_var();
920
921         if (!osn_var->sampling)
922                 return;
923         /*
924          * This value will be used in the report, but not to compute
925          * the execution time, so it is safe to get it unsafe.
926          */
927         osn_var->irq.arrival_time = time_get();
928         set_int_safe_time(osn_var, &osn_var->irq.delta_start);
929         osn_var->irq.count++;
930
931         local_inc(&osn_var->int_counter);
932 }
933
934 /*
935  * osnoise_irq_exit - Note the end of an IRQ, sava data and trace
936  *
937  * Computes the duration of the IRQ noise, and trace it. Also discounts the
938  * interference from other sources of noise could be currently being accounted.
939  */
940 void osnoise_trace_irq_exit(int id, const char *desc)
941 {
942         struct osnoise_variables *osn_var = this_cpu_osn_var();
943         s64 duration;
944
945         if (!osn_var->sampling)
946                 return;
947
948         duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start);
949         trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration);
950         osn_var->irq.arrival_time = 0;
951         cond_move_softirq_delta_start(osn_var, duration);
952         cond_move_thread_delta_start(osn_var, duration);
953 }
954
955 /*
956  * trace_irqentry_callback - Callback to the irq:irq_entry traceevent
957  *
958  * Used to note the starting of an IRQ occurece.
959  */
960 static void trace_irqentry_callback(void *data, int irq,
961                                     struct irqaction *action)
962 {
963         osnoise_trace_irq_entry(irq);
964 }
965
966 /*
967  * trace_irqexit_callback - Callback to the irq:irq_exit traceevent
968  *
969  * Used to note the end of an IRQ occurece.
970  */
971 static void trace_irqexit_callback(void *data, int irq,
972                                    struct irqaction *action, int ret)
973 {
974         osnoise_trace_irq_exit(irq, action->name);
975 }
976
977 /*
978  * arch specific register function.
979  */
980 int __weak osnoise_arch_register(void)
981 {
982         return 0;
983 }
984
985 /*
986  * arch specific unregister function.
987  */
988 void __weak osnoise_arch_unregister(void)
989 {
990         return;
991 }
992
993 /*
994  * hook_irq_events - Hook IRQ handling events
995  *
996  * This function hooks the IRQ related callbacks to the respective trace
997  * events.
998  */
999 static int hook_irq_events(void)
1000 {
1001         int ret;
1002
1003         ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1004         if (ret)
1005                 goto out_err;
1006
1007         ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1008         if (ret)
1009                 goto out_unregister_entry;
1010
1011         ret = osnoise_arch_register();
1012         if (ret)
1013                 goto out_irq_exit;
1014
1015         return 0;
1016
1017 out_irq_exit:
1018         unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1019 out_unregister_entry:
1020         unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1021 out_err:
1022         return -EINVAL;
1023 }
1024
1025 /*
1026  * unhook_irq_events - Unhook IRQ handling events
1027  *
1028  * This function unhooks the IRQ related callbacks to the respective trace
1029  * events.
1030  */
1031 static void unhook_irq_events(void)
1032 {
1033         osnoise_arch_unregister();
1034         unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1035         unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1036 }
1037
1038 #ifndef CONFIG_PREEMPT_RT
1039 /*
1040  * trace_softirq_entry_callback - Note the starting of a softirq
1041  *
1042  * Save the starting time of a softirq. As softirqs are non-preemptive to
1043  * other softirqs, it is safe to use a single variable (ons_var->softirq)
1044  * to save the statistics. The arrival_time is used to report... the
1045  * arrival time. The delta_start is used to compute the duration at the
1046  * softirq exit handler. See cond_move_softirq_delta_start().
1047  */
1048 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
1049 {
1050         struct osnoise_variables *osn_var = this_cpu_osn_var();
1051
1052         if (!osn_var->sampling)
1053                 return;
1054         /*
1055          * This value will be used in the report, but not to compute
1056          * the execution time, so it is safe to get it unsafe.
1057          */
1058         osn_var->softirq.arrival_time = time_get();
1059         set_int_safe_time(osn_var, &osn_var->softirq.delta_start);
1060         osn_var->softirq.count++;
1061
1062         local_inc(&osn_var->int_counter);
1063 }
1064
1065 /*
1066  * trace_softirq_exit_callback - Note the end of an softirq
1067  *
1068  * Computes the duration of the softirq noise, and trace it. Also discounts the
1069  * interference from other sources of noise could be currently being accounted.
1070  */
1071 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
1072 {
1073         struct osnoise_variables *osn_var = this_cpu_osn_var();
1074         s64 duration;
1075
1076         if (!osn_var->sampling)
1077                 return;
1078
1079         if (unlikely(timerlat_enabled()))
1080                 if (!timerlat_softirq_exit(osn_var))
1081                         return;
1082
1083         duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start);
1084         trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration);
1085         cond_move_thread_delta_start(osn_var, duration);
1086         osn_var->softirq.arrival_time = 0;
1087 }
1088
1089 /*
1090  * hook_softirq_events - Hook softirq handling events
1091  *
1092  * This function hooks the softirq related callbacks to the respective trace
1093  * events.
1094  */
1095 static int hook_softirq_events(void)
1096 {
1097         int ret;
1098
1099         ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1100         if (ret)
1101                 goto out_err;
1102
1103         ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1104         if (ret)
1105                 goto out_unreg_entry;
1106
1107         return 0;
1108
1109 out_unreg_entry:
1110         unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1111 out_err:
1112         return -EINVAL;
1113 }
1114
1115 /*
1116  * unhook_softirq_events - Unhook softirq handling events
1117  *
1118  * This function hooks the softirq related callbacks to the respective trace
1119  * events.
1120  */
1121 static void unhook_softirq_events(void)
1122 {
1123         unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1124         unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1125 }
1126 #else /* CONFIG_PREEMPT_RT */
1127 /*
1128  * softirq are threads on the PREEMPT_RT mode.
1129  */
1130 static int hook_softirq_events(void)
1131 {
1132         return 0;
1133 }
1134 static void unhook_softirq_events(void)
1135 {
1136 }
1137 #endif
1138
1139 /*
1140  * thread_entry - Record the starting of a thread noise window
1141  *
1142  * It saves the context switch time for a noisy thread, and increments
1143  * the interference counters.
1144  */
1145 static void
1146 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
1147 {
1148         if (!osn_var->sampling)
1149                 return;
1150         /*
1151          * The arrival time will be used in the report, but not to compute
1152          * the execution time, so it is safe to get it unsafe.
1153          */
1154         osn_var->thread.arrival_time = time_get();
1155
1156         set_int_safe_time(osn_var, &osn_var->thread.delta_start);
1157
1158         osn_var->thread.count++;
1159         local_inc(&osn_var->int_counter);
1160 }
1161
1162 /*
1163  * thread_exit - Report the end of a thread noise window
1164  *
1165  * It computes the total noise from a thread, tracing if needed.
1166  */
1167 static void
1168 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
1169 {
1170         s64 duration;
1171
1172         if (!osn_var->sampling)
1173                 return;
1174
1175         if (unlikely(timerlat_enabled()))
1176                 if (!timerlat_thread_exit(osn_var))
1177                         return;
1178
1179         duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start);
1180
1181         trace_thread_noise(t, osn_var->thread.arrival_time, duration);
1182
1183         osn_var->thread.arrival_time = 0;
1184 }
1185
1186 #ifdef CONFIG_TIMERLAT_TRACER
1187 /*
1188  * osnoise_stop_exception - Stop tracing and the tracer.
1189  */
1190 static __always_inline void osnoise_stop_exception(char *msg, int cpu)
1191 {
1192         struct osnoise_instance *inst;
1193         struct trace_array *tr;
1194
1195         rcu_read_lock();
1196         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1197                 tr = inst->tr;
1198                 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1199                                        "stop tracing hit on cpu %d due to exception: %s\n",
1200                                        smp_processor_id(),
1201                                        msg);
1202
1203                 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1204                         panic("tracer hit on cpu %d due to exception: %s\n",
1205                               smp_processor_id(),
1206                               msg);
1207
1208                 tracer_tracing_off(tr);
1209         }
1210         rcu_read_unlock();
1211 }
1212
1213 /*
1214  * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler
1215  *
1216  * his function is hooked to the sched:sched_migrate_task trace event, and monitors
1217  * timerlat user-space thread migration.
1218  */
1219 static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu)
1220 {
1221         struct osnoise_variables *osn_var;
1222         long cpu = task_cpu(p);
1223
1224         osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
1225         if (osn_var->pid == p->pid && dest_cpu != cpu) {
1226                 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
1227                 osnoise_taint("timerlat user-thread migrated\n");
1228                 osnoise_stop_exception("timerlat user-thread migrated", cpu);
1229         }
1230 }
1231
1232 static int register_migration_monitor(void)
1233 {
1234         int ret = 0;
1235
1236         /*
1237          * Timerlat thread migration check is only required when running timerlat in user-space.
1238          * Thus, enable callback only if timerlat is set with no workload.
1239          */
1240         if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options))
1241                 ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1242
1243         return ret;
1244 }
1245
1246 static void unregister_migration_monitor(void)
1247 {
1248         if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options))
1249                 unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
1250 }
1251 #else
1252 static int register_migration_monitor(void)
1253 {
1254         return 0;
1255 }
1256 static void unregister_migration_monitor(void) {}
1257 #endif
1258 /*
1259  * trace_sched_switch - sched:sched_switch trace event handler
1260  *
1261  * This function is hooked to the sched:sched_switch trace event, and it is
1262  * used to record the beginning and to report the end of a thread noise window.
1263  */
1264 static void
1265 trace_sched_switch_callback(void *data, bool preempt,
1266                             struct task_struct *p,
1267                             struct task_struct *n,
1268                             unsigned int prev_state)
1269 {
1270         struct osnoise_variables *osn_var = this_cpu_osn_var();
1271         int workload = test_bit(OSN_WORKLOAD, &osnoise_options);
1272
1273         if ((p->pid != osn_var->pid) || !workload)
1274                 thread_exit(osn_var, p);
1275
1276         if ((n->pid != osn_var->pid) || !workload)
1277                 thread_entry(osn_var, n);
1278 }
1279
1280 /*
1281  * hook_thread_events - Hook the instrumentation for thread noise
1282  *
1283  * Hook the osnoise tracer callbacks to handle the noise from other
1284  * threads on the necessary kernel events.
1285  */
1286 static int hook_thread_events(void)
1287 {
1288         int ret;
1289
1290         ret = register_trace_sched_switch(trace_sched_switch_callback, NULL);
1291         if (ret)
1292                 return -EINVAL;
1293
1294         ret = register_migration_monitor();
1295         if (ret)
1296                 goto out_unreg;
1297
1298         return 0;
1299
1300 out_unreg:
1301         unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1302         return -EINVAL;
1303 }
1304
1305 /*
1306  * unhook_thread_events - unhook the instrumentation for thread noise
1307  *
1308  * Unook the osnoise tracer callbacks to handle the noise from other
1309  * threads on the necessary kernel events.
1310  */
1311 static void unhook_thread_events(void)
1312 {
1313         unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1314         unregister_migration_monitor();
1315 }
1316
1317 /*
1318  * save_osn_sample_stats - Save the osnoise_sample statistics
1319  *
1320  * Save the osnoise_sample statistics before the sampling phase. These
1321  * values will be used later to compute the diff betwneen the statistics
1322  * before and after the osnoise sampling.
1323  */
1324 static void
1325 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1326 {
1327         s->nmi_count = osn_var->nmi.count;
1328         s->irq_count = osn_var->irq.count;
1329         s->softirq_count = osn_var->softirq.count;
1330         s->thread_count = osn_var->thread.count;
1331 }
1332
1333 /*
1334  * diff_osn_sample_stats - Compute the osnoise_sample statistics
1335  *
1336  * After a sample period, compute the difference on the osnoise_sample
1337  * statistics. The struct osnoise_sample *s contains the statistics saved via
1338  * save_osn_sample_stats() before the osnoise sampling.
1339  */
1340 static void
1341 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1342 {
1343         s->nmi_count = osn_var->nmi.count - s->nmi_count;
1344         s->irq_count = osn_var->irq.count - s->irq_count;
1345         s->softirq_count = osn_var->softirq.count - s->softirq_count;
1346         s->thread_count = osn_var->thread.count - s->thread_count;
1347 }
1348
1349 /*
1350  * osnoise_stop_tracing - Stop tracing and the tracer.
1351  */
1352 static __always_inline void osnoise_stop_tracing(void)
1353 {
1354         struct osnoise_instance *inst;
1355         struct trace_array *tr;
1356
1357         rcu_read_lock();
1358         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1359                 tr = inst->tr;
1360                 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1361                                 "stop tracing hit on cpu %d\n", smp_processor_id());
1362
1363                 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1364                         panic("tracer hit stop condition on CPU %d\n", smp_processor_id());
1365
1366                 tracer_tracing_off(tr);
1367         }
1368         rcu_read_unlock();
1369 }
1370
1371 /*
1372  * osnoise_has_tracing_on - Check if there is at least one instance on
1373  */
1374 static __always_inline int osnoise_has_tracing_on(void)
1375 {
1376         struct osnoise_instance *inst;
1377         int trace_is_on = 0;
1378
1379         rcu_read_lock();
1380         list_for_each_entry_rcu(inst, &osnoise_instances, list)
1381                 trace_is_on += tracer_tracing_is_on(inst->tr);
1382         rcu_read_unlock();
1383
1384         return trace_is_on;
1385 }
1386
1387 /*
1388  * notify_new_max_latency - Notify a new max latency via fsnotify interface.
1389  */
1390 static void notify_new_max_latency(u64 latency)
1391 {
1392         struct osnoise_instance *inst;
1393         struct trace_array *tr;
1394
1395         rcu_read_lock();
1396         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1397                 tr = inst->tr;
1398                 if (tracer_tracing_is_on(tr) && tr->max_latency < latency) {
1399                         tr->max_latency = latency;
1400                         latency_fsnotify(tr);
1401                 }
1402         }
1403         rcu_read_unlock();
1404 }
1405
1406 /*
1407  * run_osnoise - Sample the time and look for osnoise
1408  *
1409  * Used to capture the time, looking for potential osnoise latency repeatedly.
1410  * Different from hwlat_detector, it is called with preemption and interrupts
1411  * enabled. This allows irqs, softirqs and threads to run, interfering on the
1412  * osnoise sampling thread, as they would do with a regular thread.
1413  */
1414 static int run_osnoise(void)
1415 {
1416         bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options);
1417         struct osnoise_variables *osn_var = this_cpu_osn_var();
1418         u64 start, sample, last_sample;
1419         u64 last_int_count, int_count;
1420         s64 noise = 0, max_noise = 0;
1421         s64 total, last_total = 0;
1422         struct osnoise_sample s;
1423         bool disable_preemption;
1424         unsigned int threshold;
1425         u64 runtime, stop_in;
1426         u64 sum_noise = 0;
1427         int hw_count = 0;
1428         int ret = -1;
1429
1430         /*
1431          * Disabling preemption is only required if IRQs are enabled,
1432          * and the options is set on.
1433          */
1434         disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options);
1435
1436         /*
1437          * Considers the current thread as the workload.
1438          */
1439         osn_var->pid = current->pid;
1440
1441         /*
1442          * Save the current stats for the diff
1443          */
1444         save_osn_sample_stats(osn_var, &s);
1445
1446         /*
1447          * if threshold is 0, use the default value of 5 us.
1448          */
1449         threshold = tracing_thresh ? : 5000;
1450
1451         /*
1452          * Apply PREEMPT and IRQ disabled options.
1453          */
1454         if (disable_irq)
1455                 local_irq_disable();
1456
1457         if (disable_preemption)
1458                 preempt_disable();
1459
1460         /*
1461          * Make sure NMIs see sampling first
1462          */
1463         osn_var->sampling = true;
1464         barrier();
1465
1466         /*
1467          * Transform the *_us config to nanoseconds to avoid the
1468          * division on the main loop.
1469          */
1470         runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
1471         stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
1472
1473         /*
1474          * Start timestemp
1475          */
1476         start = time_get();
1477
1478         /*
1479          * "previous" loop.
1480          */
1481         last_int_count = set_int_safe_time(osn_var, &last_sample);
1482
1483         do {
1484                 /*
1485                  * Get sample!
1486                  */
1487                 int_count = set_int_safe_time(osn_var, &sample);
1488
1489                 noise = time_sub(sample, last_sample);
1490
1491                 /*
1492                  * This shouldn't happen.
1493                  */
1494                 if (noise < 0) {
1495                         osnoise_taint("negative noise!");
1496                         goto out;
1497                 }
1498
1499                 /*
1500                  * Sample runtime.
1501                  */
1502                 total = time_sub(sample, start);
1503
1504                 /*
1505                  * Check for possible overflows.
1506                  */
1507                 if (total < last_total) {
1508                         osnoise_taint("total overflow!");
1509                         break;
1510                 }
1511
1512                 last_total = total;
1513
1514                 if (noise >= threshold) {
1515                         int interference = int_count - last_int_count;
1516
1517                         if (noise > max_noise)
1518                                 max_noise = noise;
1519
1520                         if (!interference)
1521                                 hw_count++;
1522
1523                         sum_noise += noise;
1524
1525                         trace_sample_threshold(last_sample, noise, interference);
1526
1527                         if (osnoise_data.stop_tracing)
1528                                 if (noise > stop_in)
1529                                         osnoise_stop_tracing();
1530                 }
1531
1532                 /*
1533                  * In some cases, notably when running on a nohz_full CPU with
1534                  * a stopped tick PREEMPT_RCU has no way to account for QSs.
1535                  * This will eventually cause unwarranted noise as PREEMPT_RCU
1536                  * will force preemption as the means of ending the current
1537                  * grace period. We avoid this problem by calling
1538                  * rcu_momentary_dyntick_idle(), which performs a zero duration
1539                  * EQS allowing PREEMPT_RCU to end the current grace period.
1540                  * This call shouldn't be wrapped inside an RCU critical
1541                  * section.
1542                  *
1543                  * Note that in non PREEMPT_RCU kernels QSs are handled through
1544                  * cond_resched()
1545                  */
1546                 if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
1547                         if (!disable_irq)
1548                                 local_irq_disable();
1549
1550                         rcu_momentary_dyntick_idle();
1551
1552                         if (!disable_irq)
1553                                 local_irq_enable();
1554                 }
1555
1556                 /*
1557                  * For the non-preemptive kernel config: let threads runs, if
1558                  * they so wish, unless set not do to so.
1559                  */
1560                 if (!disable_irq && !disable_preemption)
1561                         cond_resched();
1562
1563                 last_sample = sample;
1564                 last_int_count = int_count;
1565
1566         } while (total < runtime && !kthread_should_stop());
1567
1568         /*
1569          * Finish the above in the view for interrupts.
1570          */
1571         barrier();
1572
1573         osn_var->sampling = false;
1574
1575         /*
1576          * Make sure sampling data is no longer updated.
1577          */
1578         barrier();
1579
1580         /*
1581          * Return to the preemptive state.
1582          */
1583         if (disable_preemption)
1584                 preempt_enable();
1585
1586         if (disable_irq)
1587                 local_irq_enable();
1588
1589         /*
1590          * Save noise info.
1591          */
1592         s.noise = time_to_us(sum_noise);
1593         s.runtime = time_to_us(total);
1594         s.max_sample = time_to_us(max_noise);
1595         s.hw_count = hw_count;
1596
1597         /* Save interference stats info */
1598         diff_osn_sample_stats(osn_var, &s);
1599
1600         trace_osnoise_sample(&s);
1601
1602         notify_new_max_latency(max_noise);
1603
1604         if (osnoise_data.stop_tracing_total)
1605                 if (s.noise > osnoise_data.stop_tracing_total)
1606                         osnoise_stop_tracing();
1607
1608         return 0;
1609 out:
1610         return ret;
1611 }
1612
1613 static struct cpumask osnoise_cpumask;
1614 static struct cpumask save_cpumask;
1615
1616 /*
1617  * osnoise_sleep - sleep until the next period
1618  */
1619 static void osnoise_sleep(bool skip_period)
1620 {
1621         u64 interval;
1622         ktime_t wake_time;
1623
1624         mutex_lock(&interface_lock);
1625         if (skip_period)
1626                 interval = osnoise_data.sample_period;
1627         else
1628                 interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
1629         mutex_unlock(&interface_lock);
1630
1631         /*
1632          * differently from hwlat_detector, the osnoise tracer can run
1633          * without a pause because preemption is on.
1634          */
1635         if (!interval) {
1636                 /* Let synchronize_rcu_tasks() make progress */
1637                 cond_resched_tasks_rcu_qs();
1638                 return;
1639         }
1640
1641         wake_time = ktime_add_us(ktime_get(), interval);
1642         __set_current_state(TASK_INTERRUPTIBLE);
1643
1644         while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) {
1645                 if (kthread_should_stop())
1646                         break;
1647         }
1648 }
1649
1650 /*
1651  * osnoise_migration_pending - checks if the task needs to migrate
1652  *
1653  * osnoise/timerlat threads are per-cpu. If there is a pending request to
1654  * migrate the thread away from the current CPU, something bad has happened.
1655  * Play the good citizen and leave.
1656  *
1657  * Returns 0 if it is safe to continue, 1 otherwise.
1658  */
1659 static inline int osnoise_migration_pending(void)
1660 {
1661         if (!current->migration_pending)
1662                 return 0;
1663
1664         /*
1665          * If migration is pending, there is a task waiting for the
1666          * tracer to enable migration. The tracer does not allow migration,
1667          * thus: taint and leave to unblock the blocked thread.
1668          */
1669         osnoise_taint("migration requested to osnoise threads, leaving.");
1670
1671         /*
1672          * Unset this thread from the threads managed by the interface.
1673          * The tracers are responsible for cleaning their env before
1674          * exiting.
1675          */
1676         mutex_lock(&interface_lock);
1677         this_cpu_osn_var()->kthread = NULL;
1678         mutex_unlock(&interface_lock);
1679
1680         return 1;
1681 }
1682
1683 /*
1684  * osnoise_main - The osnoise detection kernel thread
1685  *
1686  * Calls run_osnoise() function to measure the osnoise for the configured runtime,
1687  * every period.
1688  */
1689 static int osnoise_main(void *data)
1690 {
1691         unsigned long flags;
1692
1693         /*
1694          * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1695          * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1696          *
1697          * To work around this limitation, disable migration and remove the
1698          * flag.
1699          */
1700         migrate_disable();
1701         raw_spin_lock_irqsave(&current->pi_lock, flags);
1702         current->flags &= ~(PF_NO_SETAFFINITY);
1703         raw_spin_unlock_irqrestore(&current->pi_lock, flags);
1704
1705         while (!kthread_should_stop()) {
1706                 if (osnoise_migration_pending())
1707                         break;
1708
1709                 /* skip a period if tracing is off on all instances */
1710                 if (!osnoise_has_tracing_on()) {
1711                         osnoise_sleep(true);
1712                         continue;
1713                 }
1714
1715                 run_osnoise();
1716                 osnoise_sleep(false);
1717         }
1718
1719         migrate_enable();
1720         return 0;
1721 }
1722
1723 #ifdef CONFIG_TIMERLAT_TRACER
1724 /*
1725  * timerlat_irq - hrtimer handler for timerlat.
1726  */
1727 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
1728 {
1729         struct osnoise_variables *osn_var = this_cpu_osn_var();
1730         struct timerlat_variables *tlat;
1731         struct timerlat_sample s;
1732         u64 now;
1733         u64 diff;
1734
1735         /*
1736          * I am not sure if the timer was armed for this CPU. So, get
1737          * the timerlat struct from the timer itself, not from this
1738          * CPU.
1739          */
1740         tlat = container_of(timer, struct timerlat_variables, timer);
1741
1742         now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1743
1744         /*
1745          * Enable the osnoise: events for thread an softirq.
1746          */
1747         tlat->tracing_thread = true;
1748
1749         osn_var->thread.arrival_time = time_get();
1750
1751         /*
1752          * A hardirq is running: the timer IRQ. It is for sure preempting
1753          * a thread, and potentially preempting a softirq.
1754          *
1755          * At this point, it is not interesting to know the duration of the
1756          * preempted thread (and maybe softirq), but how much time they will
1757          * delay the beginning of the execution of the timer thread.
1758          *
1759          * To get the correct (net) delay added by the softirq, its delta_start
1760          * is set as the IRQ one. In this way, at the return of the IRQ, the delta
1761          * start of the sofitrq will be zeroed, accounting then only the time
1762          * after that.
1763          *
1764          * The thread follows the same principle. However, if a softirq is
1765          * running, the thread needs to receive the softirq delta_start. The
1766          * reason being is that the softirq will be the last to be unfolded,
1767          * resseting the thread delay to zero.
1768          *
1769          * The PREEMPT_RT is a special case, though. As softirqs run as threads
1770          * on RT, moving the thread is enough.
1771          */
1772         if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
1773                 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1774                                    &osn_var->softirq.delta_start);
1775
1776                 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start,
1777                                     &osn_var->irq.delta_start);
1778         } else {
1779                 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1780                                     &osn_var->irq.delta_start);
1781         }
1782
1783         /*
1784          * Compute the current time with the expected time.
1785          */
1786         diff = now - tlat->abs_period;
1787
1788         tlat->count++;
1789         s.seqnum = tlat->count;
1790         s.timer_latency = diff;
1791         s.context = IRQ_CONTEXT;
1792
1793         trace_timerlat_sample(&s);
1794
1795         if (osnoise_data.stop_tracing) {
1796                 if (time_to_us(diff) >= osnoise_data.stop_tracing) {
1797
1798                         /*
1799                          * At this point, if stop_tracing is set and <= print_stack,
1800                          * print_stack is set and would be printed in the thread handler.
1801                          *
1802                          * Thus, print the stack trace as it is helpful to define the
1803                          * root cause of an IRQ latency.
1804                          */
1805                         if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
1806                                 timerlat_save_stack(0);
1807                                 timerlat_dump_stack(time_to_us(diff));
1808                         }
1809
1810                         osnoise_stop_tracing();
1811                         notify_new_max_latency(diff);
1812
1813                         wake_up_process(tlat->kthread);
1814
1815                         return HRTIMER_NORESTART;
1816                 }
1817         }
1818
1819         wake_up_process(tlat->kthread);
1820
1821         if (osnoise_data.print_stack)
1822                 timerlat_save_stack(0);
1823
1824         return HRTIMER_NORESTART;
1825 }
1826
1827 /*
1828  * wait_next_period - Wait for the next period for timerlat
1829  */
1830 static int wait_next_period(struct timerlat_variables *tlat)
1831 {
1832         ktime_t next_abs_period, now;
1833         u64 rel_period = osnoise_data.timerlat_period * 1000;
1834
1835         now = hrtimer_cb_get_time(&tlat->timer);
1836         next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1837
1838         /*
1839          * Save the next abs_period.
1840          */
1841         tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1842
1843         /*
1844          * If the new abs_period is in the past, skip the activation.
1845          */
1846         while (ktime_compare(now, next_abs_period) > 0) {
1847                 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1848                 tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1849         }
1850
1851         set_current_state(TASK_INTERRUPTIBLE);
1852
1853         hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD);
1854         schedule();
1855         return 1;
1856 }
1857
1858 /*
1859  * timerlat_main- Timerlat main
1860  */
1861 static int timerlat_main(void *data)
1862 {
1863         struct osnoise_variables *osn_var = this_cpu_osn_var();
1864         struct timerlat_variables *tlat = this_cpu_tmr_var();
1865         struct timerlat_sample s;
1866         struct sched_param sp;
1867         unsigned long flags;
1868         u64 now, diff;
1869
1870         /*
1871          * Make the thread RT, that is how cyclictest is usually used.
1872          */
1873         sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
1874         sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1875
1876         /*
1877          * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1878          * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1879          *
1880          * To work around this limitation, disable migration and remove the
1881          * flag.
1882          */
1883         migrate_disable();
1884         raw_spin_lock_irqsave(&current->pi_lock, flags);
1885         current->flags &= ~(PF_NO_SETAFFINITY);
1886         raw_spin_unlock_irqrestore(&current->pi_lock, flags);
1887
1888         tlat->count = 0;
1889         tlat->tracing_thread = false;
1890
1891         hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1892         tlat->timer.function = timerlat_irq;
1893         tlat->kthread = current;
1894         osn_var->pid = current->pid;
1895         /*
1896          * Anotate the arrival time.
1897          */
1898         tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
1899
1900         wait_next_period(tlat);
1901
1902         osn_var->sampling = 1;
1903
1904         while (!kthread_should_stop()) {
1905
1906                 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1907                 diff = now - tlat->abs_period;
1908
1909                 s.seqnum = tlat->count;
1910                 s.timer_latency = diff;
1911                 s.context = THREAD_CONTEXT;
1912
1913                 trace_timerlat_sample(&s);
1914
1915                 notify_new_max_latency(diff);
1916
1917                 timerlat_dump_stack(time_to_us(diff));
1918
1919                 tlat->tracing_thread = false;
1920                 if (osnoise_data.stop_tracing_total)
1921                         if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
1922                                 osnoise_stop_tracing();
1923
1924                 if (osnoise_migration_pending())
1925                         break;
1926
1927                 wait_next_period(tlat);
1928         }
1929
1930         hrtimer_cancel(&tlat->timer);
1931         migrate_enable();
1932         return 0;
1933 }
1934 #else /* CONFIG_TIMERLAT_TRACER */
1935 static int timerlat_main(void *data)
1936 {
1937         return 0;
1938 }
1939 #endif /* CONFIG_TIMERLAT_TRACER */
1940
1941 /*
1942  * stop_kthread - stop a workload thread
1943  */
1944 static void stop_kthread(unsigned int cpu)
1945 {
1946         struct task_struct *kthread;
1947
1948         kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
1949         if (kthread) {
1950                 if (test_bit(OSN_WORKLOAD, &osnoise_options)) {
1951                         kthread_stop(kthread);
1952                 } else {
1953                         /*
1954                          * This is a user thread waiting on the timerlat_fd. We need
1955                          * to close all users, and the best way to guarantee this is
1956                          * by killing the thread. NOTE: this is a purpose specific file.
1957                          */
1958                         kill_pid(kthread->thread_pid, SIGKILL, 1);
1959                         put_task_struct(kthread);
1960                 }
1961                 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
1962         } else {
1963                 /* if no workload, just return */
1964                 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1965                         /*
1966                          * This is set in the osnoise tracer case.
1967                          */
1968                         per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
1969                         barrier();
1970                         return;
1971                 }
1972         }
1973 }
1974
1975 /*
1976  * stop_per_cpu_kthread - Stop per-cpu threads
1977  *
1978  * Stop the osnoise sampling htread. Use this on unload and at system
1979  * shutdown.
1980  */
1981 static void stop_per_cpu_kthreads(void)
1982 {
1983         int cpu;
1984
1985         cpus_read_lock();
1986
1987         for_each_online_cpu(cpu)
1988                 stop_kthread(cpu);
1989
1990         cpus_read_unlock();
1991 }
1992
1993 /*
1994  * start_kthread - Start a workload tread
1995  */
1996 static int start_kthread(unsigned int cpu)
1997 {
1998         struct task_struct *kthread;
1999         void *main = osnoise_main;
2000         char comm[24];
2001
2002         if (timerlat_enabled()) {
2003                 snprintf(comm, 24, "timerlat/%d", cpu);
2004                 main = timerlat_main;
2005         } else {
2006                 /* if no workload, just return */
2007                 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
2008                         per_cpu(per_cpu_osnoise_var, cpu).sampling = true;
2009                         barrier();
2010                         return 0;
2011                 }
2012                 snprintf(comm, 24, "osnoise/%d", cpu);
2013         }
2014
2015         kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
2016
2017         if (IS_ERR(kthread)) {
2018                 pr_err(BANNER "could not start sampling thread\n");
2019                 stop_per_cpu_kthreads();
2020                 return -ENOMEM;
2021         }
2022
2023         per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
2024
2025         return 0;
2026 }
2027
2028 /*
2029  * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads
2030  *
2031  * This starts the kernel thread that will look for osnoise on many
2032  * cpus.
2033  */
2034 static int start_per_cpu_kthreads(void)
2035 {
2036         struct cpumask *current_mask = &save_cpumask;
2037         int retval = 0;
2038         int cpu;
2039
2040         if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
2041                 if (timerlat_enabled())
2042                         return 0;
2043         }
2044
2045         cpus_read_lock();
2046         /*
2047          * Run only on online CPUs in which osnoise is allowed to run.
2048          */
2049         cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
2050
2051         for_each_possible_cpu(cpu)
2052                 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
2053
2054         for_each_cpu(cpu, current_mask) {
2055                 retval = start_kthread(cpu);
2056                 if (retval) {
2057                         cpus_read_unlock();
2058                         stop_per_cpu_kthreads();
2059                         return retval;
2060                 }
2061         }
2062
2063         cpus_read_unlock();
2064
2065         return retval;
2066 }
2067
2068 #ifdef CONFIG_HOTPLUG_CPU
2069 static void osnoise_hotplug_workfn(struct work_struct *dummy)
2070 {
2071         unsigned int cpu = smp_processor_id();
2072
2073         mutex_lock(&trace_types_lock);
2074
2075         if (!osnoise_has_registered_instances())
2076                 goto out_unlock_trace;
2077
2078         mutex_lock(&interface_lock);
2079         cpus_read_lock();
2080
2081         if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
2082                 goto out_unlock;
2083
2084         start_kthread(cpu);
2085
2086 out_unlock:
2087         cpus_read_unlock();
2088         mutex_unlock(&interface_lock);
2089 out_unlock_trace:
2090         mutex_unlock(&trace_types_lock);
2091 }
2092
2093 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
2094
2095 /*
2096  * osnoise_cpu_init - CPU hotplug online callback function
2097  */
2098 static int osnoise_cpu_init(unsigned int cpu)
2099 {
2100         schedule_work_on(cpu, &osnoise_hotplug_work);
2101         return 0;
2102 }
2103
2104 /*
2105  * osnoise_cpu_die - CPU hotplug offline callback function
2106  */
2107 static int osnoise_cpu_die(unsigned int cpu)
2108 {
2109         stop_kthread(cpu);
2110         return 0;
2111 }
2112
2113 static void osnoise_init_hotplug_support(void)
2114 {
2115         int ret;
2116
2117         ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online",
2118                                 osnoise_cpu_init, osnoise_cpu_die);
2119         if (ret < 0)
2120                 pr_warn(BANNER "Error to init cpu hotplug support\n");
2121
2122         return;
2123 }
2124 #else /* CONFIG_HOTPLUG_CPU */
2125 static void osnoise_init_hotplug_support(void)
2126 {
2127         return;
2128 }
2129 #endif /* CONFIG_HOTPLUG_CPU */
2130
2131 /*
2132  * seq file functions for the osnoise/options file.
2133  */
2134 static void *s_options_start(struct seq_file *s, loff_t *pos)
2135 {
2136         int option = *pos;
2137
2138         mutex_lock(&interface_lock);
2139
2140         if (option >= OSN_MAX)
2141                 return NULL;
2142
2143         return pos;
2144 }
2145
2146 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos)
2147 {
2148         int option = ++(*pos);
2149
2150         if (option >= OSN_MAX)
2151                 return NULL;
2152
2153         return pos;
2154 }
2155
2156 static int s_options_show(struct seq_file *s, void *v)
2157 {
2158         loff_t *pos = v;
2159         int option = *pos;
2160
2161         if (option == OSN_DEFAULTS) {
2162                 if (osnoise_options == OSN_DEFAULT_OPTIONS)
2163                         seq_printf(s, "%s", osnoise_options_str[option]);
2164                 else
2165                         seq_printf(s, "NO_%s", osnoise_options_str[option]);
2166                 goto out;
2167         }
2168
2169         if (test_bit(option, &osnoise_options))
2170                 seq_printf(s, "%s", osnoise_options_str[option]);
2171         else
2172                 seq_printf(s, "NO_%s", osnoise_options_str[option]);
2173
2174 out:
2175         if (option != OSN_MAX)
2176                 seq_puts(s, " ");
2177
2178         return 0;
2179 }
2180
2181 static void s_options_stop(struct seq_file *s, void *v)
2182 {
2183         seq_puts(s, "\n");
2184         mutex_unlock(&interface_lock);
2185 }
2186
2187 static const struct seq_operations osnoise_options_seq_ops = {
2188         .start          = s_options_start,
2189         .next           = s_options_next,
2190         .show           = s_options_show,
2191         .stop           = s_options_stop
2192 };
2193
2194 static int osnoise_options_open(struct inode *inode, struct file *file)
2195 {
2196         return seq_open(file, &osnoise_options_seq_ops);
2197 };
2198
2199 /**
2200  * osnoise_options_write - Write function for "options" entry
2201  * @filp: The active open file structure
2202  * @ubuf: The user buffer that contains the value to write
2203  * @cnt: The maximum number of bytes to write to "file"
2204  * @ppos: The current position in @file
2205  *
2206  * Writing the option name sets the option, writing the "NO_"
2207  * prefix in front of the option name disables it.
2208  *
2209  * Writing "DEFAULTS" resets the option values to the default ones.
2210  */
2211 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
2212                                      size_t cnt, loff_t *ppos)
2213 {
2214         int running, option, enable, retval;
2215         char buf[256], *option_str;
2216
2217         if (cnt >= 256)
2218                 return -EINVAL;
2219
2220         if (copy_from_user(buf, ubuf, cnt))
2221                 return -EFAULT;
2222
2223         buf[cnt] = 0;
2224
2225         if (strncmp(buf, "NO_", 3)) {
2226                 option_str = strstrip(buf);
2227                 enable = true;
2228         } else {
2229                 option_str = strstrip(&buf[3]);
2230                 enable = false;
2231         }
2232
2233         option = match_string(osnoise_options_str, OSN_MAX, option_str);
2234         if (option < 0)
2235                 return -EINVAL;
2236
2237         /*
2238          * trace_types_lock is taken to avoid concurrency on start/stop.
2239          */
2240         mutex_lock(&trace_types_lock);
2241         running = osnoise_has_registered_instances();
2242         if (running)
2243                 stop_per_cpu_kthreads();
2244
2245         mutex_lock(&interface_lock);
2246         /*
2247          * avoid CPU hotplug operations that might read options.
2248          */
2249         cpus_read_lock();
2250
2251         retval = cnt;
2252
2253         if (enable) {
2254                 if (option == OSN_DEFAULTS)
2255                         osnoise_options = OSN_DEFAULT_OPTIONS;
2256                 else
2257                         set_bit(option, &osnoise_options);
2258         } else {
2259                 if (option == OSN_DEFAULTS)
2260                         retval = -EINVAL;
2261                 else
2262                         clear_bit(option, &osnoise_options);
2263         }
2264
2265         cpus_read_unlock();
2266         mutex_unlock(&interface_lock);
2267
2268         if (running)
2269                 start_per_cpu_kthreads();
2270         mutex_unlock(&trace_types_lock);
2271
2272         return retval;
2273 }
2274
2275 /*
2276  * osnoise_cpus_read - Read function for reading the "cpus" file
2277  * @filp: The active open file structure
2278  * @ubuf: The userspace provided buffer to read value into
2279  * @cnt: The maximum number of bytes to read
2280  * @ppos: The current "file" position
2281  *
2282  * Prints the "cpus" output into the user-provided buffer.
2283  */
2284 static ssize_t
2285 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
2286                   loff_t *ppos)
2287 {
2288         char *mask_str;
2289         int len;
2290
2291         mutex_lock(&interface_lock);
2292
2293         len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
2294         mask_str = kmalloc(len, GFP_KERNEL);
2295         if (!mask_str) {
2296                 count = -ENOMEM;
2297                 goto out_unlock;
2298         }
2299
2300         len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
2301         if (len >= count) {
2302                 count = -EINVAL;
2303                 goto out_free;
2304         }
2305
2306         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
2307
2308 out_free:
2309         kfree(mask_str);
2310 out_unlock:
2311         mutex_unlock(&interface_lock);
2312
2313         return count;
2314 }
2315
2316 /*
2317  * osnoise_cpus_write - Write function for "cpus" entry
2318  * @filp: The active open file structure
2319  * @ubuf: The user buffer that contains the value to write
2320  * @cnt: The maximum number of bytes to write to "file"
2321  * @ppos: The current position in @file
2322  *
2323  * This function provides a write implementation for the "cpus"
2324  * interface to the osnoise trace. By default, it lists all  CPUs,
2325  * in this way, allowing osnoise threads to run on any online CPU
2326  * of the system. It serves to restrict the execution of osnoise to the
2327  * set of CPUs writing via this interface. Why not use "tracing_cpumask"?
2328  * Because the user might be interested in tracing what is running on
2329  * other CPUs. For instance, one might run osnoise in one HT CPU
2330  * while observing what is running on the sibling HT CPU.
2331  */
2332 static ssize_t
2333 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
2334                    loff_t *ppos)
2335 {
2336         cpumask_var_t osnoise_cpumask_new;
2337         int running, err;
2338         char buf[256];
2339
2340         if (count >= 256)
2341                 return -EINVAL;
2342
2343         if (copy_from_user(buf, ubuf, count))
2344                 return -EFAULT;
2345
2346         if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL))
2347                 return -ENOMEM;
2348
2349         err = cpulist_parse(buf, osnoise_cpumask_new);
2350         if (err)
2351                 goto err_free;
2352
2353         /*
2354          * trace_types_lock is taken to avoid concurrency on start/stop.
2355          */
2356         mutex_lock(&trace_types_lock);
2357         running = osnoise_has_registered_instances();
2358         if (running)
2359                 stop_per_cpu_kthreads();
2360
2361         mutex_lock(&interface_lock);
2362         /*
2363          * osnoise_cpumask is read by CPU hotplug operations.
2364          */
2365         cpus_read_lock();
2366
2367         cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
2368
2369         cpus_read_unlock();
2370         mutex_unlock(&interface_lock);
2371
2372         if (running)
2373                 start_per_cpu_kthreads();
2374         mutex_unlock(&trace_types_lock);
2375
2376         free_cpumask_var(osnoise_cpumask_new);
2377         return count;
2378
2379 err_free:
2380         free_cpumask_var(osnoise_cpumask_new);
2381
2382         return err;
2383 }
2384
2385 #ifdef CONFIG_TIMERLAT_TRACER
2386 static int timerlat_fd_open(struct inode *inode, struct file *file)
2387 {
2388         struct osnoise_variables *osn_var;
2389         struct timerlat_variables *tlat;
2390         long cpu = (long) inode->i_cdev;
2391
2392         mutex_lock(&interface_lock);
2393
2394         /*
2395          * This file is accessible only if timerlat is enabled, and
2396          * NO_OSNOISE_WORKLOAD is set.
2397          */
2398         if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) {
2399                 mutex_unlock(&interface_lock);
2400                 return -EINVAL;
2401         }
2402
2403         migrate_disable();
2404
2405         osn_var = this_cpu_osn_var();
2406
2407         /*
2408          * The osn_var->pid holds the single access to this file.
2409          */
2410         if (osn_var->pid) {
2411                 mutex_unlock(&interface_lock);
2412                 migrate_enable();
2413                 return -EBUSY;
2414         }
2415
2416         /*
2417          * timerlat tracer is a per-cpu tracer. Check if the user-space too
2418          * is pinned to a single CPU. The tracer laters monitor if the task
2419          * migrates and then disables tracer if it does. However, it is
2420          * worth doing this basic acceptance test to avoid obviusly wrong
2421          * setup.
2422          */
2423         if (current->nr_cpus_allowed > 1 ||  cpu != smp_processor_id()) {
2424                 mutex_unlock(&interface_lock);
2425                 migrate_enable();
2426                 return -EPERM;
2427         }
2428
2429         /*
2430          * From now on, it is good to go.
2431          */
2432         file->private_data = inode->i_cdev;
2433
2434         get_task_struct(current);
2435
2436         osn_var->kthread = current;
2437         osn_var->pid = current->pid;
2438
2439         /*
2440          * Setup is done.
2441          */
2442         mutex_unlock(&interface_lock);
2443
2444         tlat = this_cpu_tmr_var();
2445         tlat->count = 0;
2446
2447         migrate_enable();
2448         return 0;
2449 };
2450
2451 /*
2452  * timerlat_fd_read - Read function for "timerlat_fd" file
2453  * @file: The active open file structure
2454  * @ubuf: The userspace provided buffer to read value into
2455  * @cnt: The maximum number of bytes to read
2456  * @ppos: The current "file" position
2457  *
2458  * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error.
2459  */
2460 static ssize_t
2461 timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
2462                   loff_t *ppos)
2463 {
2464         long cpu = (long) file->private_data;
2465         struct osnoise_variables *osn_var;
2466         struct timerlat_variables *tlat;
2467         struct timerlat_sample s;
2468         s64 diff;
2469         u64 now;
2470
2471         migrate_disable();
2472
2473         tlat = this_cpu_tmr_var();
2474
2475         /*
2476          * While in user-space, the thread is migratable. There is nothing
2477          * we can do about it.
2478          * So, if the thread is running on another CPU, stop the machinery.
2479          */
2480         if (cpu == smp_processor_id()) {
2481                 if (tlat->uthread_migrate) {
2482                         migrate_enable();
2483                         return -EINVAL;
2484                 }
2485         } else {
2486                 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
2487                 osnoise_taint("timerlat user thread migrate\n");
2488                 osnoise_stop_tracing();
2489                 migrate_enable();
2490                 return -EINVAL;
2491         }
2492
2493         osn_var = this_cpu_osn_var();
2494
2495         /*
2496          * The timerlat in user-space runs in a different order:
2497          * the read() starts from the execution of the previous occurrence,
2498          * sleeping for the next occurrence.
2499          *
2500          * So, skip if we are entering on read() before the first wakeup
2501          * from timerlat IRQ:
2502          */
2503         if (likely(osn_var->sampling)) {
2504                 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2505                 diff = now - tlat->abs_period;
2506
2507                 /*
2508                  * it was not a timer firing, but some other signal?
2509                  */
2510                 if (diff < 0)
2511                         goto out;
2512
2513                 s.seqnum = tlat->count;
2514                 s.timer_latency = diff;
2515                 s.context = THREAD_URET;
2516
2517                 trace_timerlat_sample(&s);
2518
2519                 notify_new_max_latency(diff);
2520
2521                 tlat->tracing_thread = false;
2522                 if (osnoise_data.stop_tracing_total)
2523                         if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
2524                                 osnoise_stop_tracing();
2525         } else {
2526                 tlat->tracing_thread = false;
2527                 tlat->kthread = current;
2528
2529                 hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
2530                 tlat->timer.function = timerlat_irq;
2531
2532                 /* Annotate now to drift new period */
2533                 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
2534
2535                 osn_var->sampling = 1;
2536         }
2537
2538         /* wait for the next period */
2539         wait_next_period(tlat);
2540
2541         /* This is the wakeup from this cycle */
2542         now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
2543         diff = now - tlat->abs_period;
2544
2545         /*
2546          * it was not a timer firing, but some other signal?
2547          */
2548         if (diff < 0)
2549                 goto out;
2550
2551         s.seqnum = tlat->count;
2552         s.timer_latency = diff;
2553         s.context = THREAD_CONTEXT;
2554
2555         trace_timerlat_sample(&s);
2556
2557         if (osnoise_data.stop_tracing_total) {
2558                 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) {
2559                         timerlat_dump_stack(time_to_us(diff));
2560                         notify_new_max_latency(diff);
2561                         osnoise_stop_tracing();
2562                 }
2563         }
2564
2565 out:
2566         migrate_enable();
2567         return 0;
2568 }
2569
2570 static int timerlat_fd_release(struct inode *inode, struct file *file)
2571 {
2572         struct osnoise_variables *osn_var;
2573         struct timerlat_variables *tlat_var;
2574         long cpu = (long) file->private_data;
2575
2576         migrate_disable();
2577         mutex_lock(&interface_lock);
2578
2579         osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
2580         tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
2581
2582         hrtimer_cancel(&tlat_var->timer);
2583         memset(tlat_var, 0, sizeof(*tlat_var));
2584
2585         osn_var->sampling = 0;
2586         osn_var->pid = 0;
2587
2588         /*
2589          * We are leaving, not being stopped... see stop_kthread();
2590          */
2591         if (osn_var->kthread) {
2592                 put_task_struct(osn_var->kthread);
2593                 osn_var->kthread = NULL;
2594         }
2595
2596         mutex_unlock(&interface_lock);
2597         migrate_enable();
2598         return 0;
2599 }
2600 #endif
2601
2602 /*
2603  * osnoise/runtime_us: cannot be greater than the period.
2604  */
2605 static struct trace_min_max_param osnoise_runtime = {
2606         .lock   = &interface_lock,
2607         .val    = &osnoise_data.sample_runtime,
2608         .max    = &osnoise_data.sample_period,
2609         .min    = NULL,
2610 };
2611
2612 /*
2613  * osnoise/period_us: cannot be smaller than the runtime.
2614  */
2615 static struct trace_min_max_param osnoise_period = {
2616         .lock   = &interface_lock,
2617         .val    = &osnoise_data.sample_period,
2618         .max    = NULL,
2619         .min    = &osnoise_data.sample_runtime,
2620 };
2621
2622 /*
2623  * osnoise/stop_tracing_us: no limit.
2624  */
2625 static struct trace_min_max_param osnoise_stop_tracing_in = {
2626         .lock   = &interface_lock,
2627         .val    = &osnoise_data.stop_tracing,
2628         .max    = NULL,
2629         .min    = NULL,
2630 };
2631
2632 /*
2633  * osnoise/stop_tracing_total_us: no limit.
2634  */
2635 static struct trace_min_max_param osnoise_stop_tracing_total = {
2636         .lock   = &interface_lock,
2637         .val    = &osnoise_data.stop_tracing_total,
2638         .max    = NULL,
2639         .min    = NULL,
2640 };
2641
2642 #ifdef CONFIG_TIMERLAT_TRACER
2643 /*
2644  * osnoise/print_stack: print the stacktrace of the IRQ handler if the total
2645  * latency is higher than val.
2646  */
2647 static struct trace_min_max_param osnoise_print_stack = {
2648         .lock   = &interface_lock,
2649         .val    = &osnoise_data.print_stack,
2650         .max    = NULL,
2651         .min    = NULL,
2652 };
2653
2654 /*
2655  * osnoise/timerlat_period: min 100 us, max 1 s
2656  */
2657 static u64 timerlat_min_period = 100;
2658 static u64 timerlat_max_period = 1000000;
2659 static struct trace_min_max_param timerlat_period = {
2660         .lock   = &interface_lock,
2661         .val    = &osnoise_data.timerlat_period,
2662         .max    = &timerlat_max_period,
2663         .min    = &timerlat_min_period,
2664 };
2665
2666 static const struct file_operations timerlat_fd_fops = {
2667         .open           = timerlat_fd_open,
2668         .read           = timerlat_fd_read,
2669         .release        = timerlat_fd_release,
2670         .llseek         = generic_file_llseek,
2671 };
2672 #endif
2673
2674 static const struct file_operations cpus_fops = {
2675         .open           = tracing_open_generic,
2676         .read           = osnoise_cpus_read,
2677         .write          = osnoise_cpus_write,
2678         .llseek         = generic_file_llseek,
2679 };
2680
2681 static const struct file_operations osnoise_options_fops = {
2682         .open           = osnoise_options_open,
2683         .read           = seq_read,
2684         .llseek         = seq_lseek,
2685         .release        = seq_release,
2686         .write          = osnoise_options_write
2687 };
2688
2689 #ifdef CONFIG_TIMERLAT_TRACER
2690 #ifdef CONFIG_STACKTRACE
2691 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2692 {
2693         struct dentry *tmp;
2694
2695         tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
2696                                   &osnoise_print_stack, &trace_min_max_fops);
2697         if (!tmp)
2698                 return -ENOMEM;
2699
2700         return 0;
2701 }
2702 #else /* CONFIG_STACKTRACE */
2703 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2704 {
2705         return 0;
2706 }
2707 #endif /* CONFIG_STACKTRACE */
2708
2709 static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir)
2710 {
2711         struct dentry *timerlat_fd;
2712         struct dentry *per_cpu;
2713         struct dentry *cpu_dir;
2714         char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */
2715         long cpu;
2716
2717         /*
2718          * Why not using tracing instance per_cpu/ dir?
2719          *
2720          * Because osnoise/timerlat have a single workload, having
2721          * multiple files like these are wast of memory.
2722          */
2723         per_cpu = tracefs_create_dir("per_cpu", top_dir);
2724         if (!per_cpu)
2725                 return -ENOMEM;
2726
2727         for_each_possible_cpu(cpu) {
2728                 snprintf(cpu_str, 30, "cpu%ld", cpu);
2729                 cpu_dir = tracefs_create_dir(cpu_str, per_cpu);
2730                 if (!cpu_dir)
2731                         goto out_clean;
2732
2733                 timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ,
2734                                                 cpu_dir, NULL, &timerlat_fd_fops);
2735                 if (!timerlat_fd)
2736                         goto out_clean;
2737
2738                 /* Record the CPU */
2739                 d_inode(timerlat_fd)->i_cdev = (void *)(cpu);
2740         }
2741
2742         return 0;
2743
2744 out_clean:
2745         tracefs_remove(per_cpu);
2746         return -ENOMEM;
2747 }
2748
2749 /*
2750  * init_timerlat_tracefs - A function to initialize the timerlat interface files
2751  */
2752 static int init_timerlat_tracefs(struct dentry *top_dir)
2753 {
2754         struct dentry *tmp;
2755         int retval;
2756
2757         tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
2758                                   &timerlat_period, &trace_min_max_fops);
2759         if (!tmp)
2760                 return -ENOMEM;
2761
2762         retval = osnoise_create_cpu_timerlat_fd(top_dir);
2763         if (retval)
2764                 return retval;
2765
2766         return init_timerlat_stack_tracefs(top_dir);
2767 }
2768 #else /* CONFIG_TIMERLAT_TRACER */
2769 static int init_timerlat_tracefs(struct dentry *top_dir)
2770 {
2771         return 0;
2772 }
2773 #endif /* CONFIG_TIMERLAT_TRACER */
2774
2775 /*
2776  * init_tracefs - A function to initialize the tracefs interface files
2777  *
2778  * This function creates entries in tracefs for "osnoise" and "timerlat".
2779  * It creates these directories in the tracing directory, and within that
2780  * directory the use can change and view the configs.
2781  */
2782 static int init_tracefs(void)
2783 {
2784         struct dentry *top_dir;
2785         struct dentry *tmp;
2786         int ret;
2787
2788         ret = tracing_init_dentry();
2789         if (ret)
2790                 return -ENOMEM;
2791
2792         top_dir = tracefs_create_dir("osnoise", NULL);
2793         if (!top_dir)
2794                 return 0;
2795
2796         tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir,
2797                                   &osnoise_period, &trace_min_max_fops);
2798         if (!tmp)
2799                 goto err;
2800
2801         tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir,
2802                                   &osnoise_runtime, &trace_min_max_fops);
2803         if (!tmp)
2804                 goto err;
2805
2806         tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir,
2807                                   &osnoise_stop_tracing_in, &trace_min_max_fops);
2808         if (!tmp)
2809                 goto err;
2810
2811         tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir,
2812                                   &osnoise_stop_tracing_total, &trace_min_max_fops);
2813         if (!tmp)
2814                 goto err;
2815
2816         tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
2817         if (!tmp)
2818                 goto err;
2819
2820         tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL,
2821                                 &osnoise_options_fops);
2822         if (!tmp)
2823                 goto err;
2824
2825         ret = init_timerlat_tracefs(top_dir);
2826         if (ret)
2827                 goto err;
2828
2829         return 0;
2830
2831 err:
2832         tracefs_remove(top_dir);
2833         return -ENOMEM;
2834 }
2835
2836 static int osnoise_hook_events(void)
2837 {
2838         int retval;
2839
2840         /*
2841          * Trace is already hooked, we are re-enabling from
2842          * a stop_tracing_*.
2843          */
2844         if (trace_osnoise_callback_enabled)
2845                 return 0;
2846
2847         retval = hook_irq_events();
2848         if (retval)
2849                 return -EINVAL;
2850
2851         retval = hook_softirq_events();
2852         if (retval)
2853                 goto out_unhook_irq;
2854
2855         retval = hook_thread_events();
2856         /*
2857          * All fine!
2858          */
2859         if (!retval)
2860                 return 0;
2861
2862         unhook_softirq_events();
2863 out_unhook_irq:
2864         unhook_irq_events();
2865         return -EINVAL;
2866 }
2867
2868 static void osnoise_unhook_events(void)
2869 {
2870         unhook_thread_events();
2871         unhook_softirq_events();
2872         unhook_irq_events();
2873 }
2874
2875 /*
2876  * osnoise_workload_start - start the workload and hook to events
2877  */
2878 static int osnoise_workload_start(void)
2879 {
2880         int retval;
2881
2882         /*
2883          * Instances need to be registered after calling workload
2884          * start. Hence, if there is already an instance, the
2885          * workload was already registered. Otherwise, this
2886          * code is on the way to register the first instance,
2887          * and the workload will start.
2888          */
2889         if (osnoise_has_registered_instances())
2890                 return 0;
2891
2892         osn_var_reset_all();
2893
2894         retval = osnoise_hook_events();
2895         if (retval)
2896                 return retval;
2897
2898         /*
2899          * Make sure that ftrace_nmi_enter/exit() see reset values
2900          * before enabling trace_osnoise_callback_enabled.
2901          */
2902         barrier();
2903         trace_osnoise_callback_enabled = true;
2904
2905         retval = start_per_cpu_kthreads();
2906         if (retval) {
2907                 trace_osnoise_callback_enabled = false;
2908                 /*
2909                  * Make sure that ftrace_nmi_enter/exit() see
2910                  * trace_osnoise_callback_enabled as false before continuing.
2911                  */
2912                 barrier();
2913
2914                 osnoise_unhook_events();
2915                 return retval;
2916         }
2917
2918         return 0;
2919 }
2920
2921 /*
2922  * osnoise_workload_stop - stop the workload and unhook the events
2923  */
2924 static void osnoise_workload_stop(void)
2925 {
2926         /*
2927          * Instances need to be unregistered before calling
2928          * stop. Hence, if there is a registered instance, more
2929          * than one instance is running, and the workload will not
2930          * yet stop. Otherwise, this code is on the way to disable
2931          * the last instance, and the workload can stop.
2932          */
2933         if (osnoise_has_registered_instances())
2934                 return;
2935
2936         /*
2937          * If callbacks were already disabled in a previous stop
2938          * call, there is no need to disable then again.
2939          *
2940          * For instance, this happens when tracing is stopped via:
2941          * echo 0 > tracing_on
2942          * echo nop > current_tracer.
2943          */
2944         if (!trace_osnoise_callback_enabled)
2945                 return;
2946
2947         trace_osnoise_callback_enabled = false;
2948         /*
2949          * Make sure that ftrace_nmi_enter/exit() see
2950          * trace_osnoise_callback_enabled as false before continuing.
2951          */
2952         barrier();
2953
2954         stop_per_cpu_kthreads();
2955
2956         osnoise_unhook_events();
2957 }
2958
2959 static void osnoise_tracer_start(struct trace_array *tr)
2960 {
2961         int retval;
2962
2963         /*
2964          * If the instance is already registered, there is no need to
2965          * register it again.
2966          */
2967         if (osnoise_instance_registered(tr))
2968                 return;
2969
2970         retval = osnoise_workload_start();
2971         if (retval)
2972                 pr_err(BANNER "Error starting osnoise tracer\n");
2973
2974         osnoise_register_instance(tr);
2975 }
2976
2977 static void osnoise_tracer_stop(struct trace_array *tr)
2978 {
2979         osnoise_unregister_instance(tr);
2980         osnoise_workload_stop();
2981 }
2982
2983 static int osnoise_tracer_init(struct trace_array *tr)
2984 {
2985         /*
2986          * Only allow osnoise tracer if timerlat tracer is not running
2987          * already.
2988          */
2989         if (timerlat_enabled())
2990                 return -EBUSY;
2991
2992         tr->max_latency = 0;
2993
2994         osnoise_tracer_start(tr);
2995         return 0;
2996 }
2997
2998 static void osnoise_tracer_reset(struct trace_array *tr)
2999 {
3000         osnoise_tracer_stop(tr);
3001 }
3002
3003 static struct tracer osnoise_tracer __read_mostly = {
3004         .name           = "osnoise",
3005         .init           = osnoise_tracer_init,
3006         .reset          = osnoise_tracer_reset,
3007         .start          = osnoise_tracer_start,
3008         .stop           = osnoise_tracer_stop,
3009         .print_header   = print_osnoise_headers,
3010         .allow_instances = true,
3011 };
3012
3013 #ifdef CONFIG_TIMERLAT_TRACER
3014 static void timerlat_tracer_start(struct trace_array *tr)
3015 {
3016         int retval;
3017
3018         /*
3019          * If the instance is already registered, there is no need to
3020          * register it again.
3021          */
3022         if (osnoise_instance_registered(tr))
3023                 return;
3024
3025         retval = osnoise_workload_start();
3026         if (retval)
3027                 pr_err(BANNER "Error starting timerlat tracer\n");
3028
3029         osnoise_register_instance(tr);
3030
3031         return;
3032 }
3033
3034 static void timerlat_tracer_stop(struct trace_array *tr)
3035 {
3036         int cpu;
3037
3038         osnoise_unregister_instance(tr);
3039
3040         /*
3041          * Instruct the threads to stop only if this is the last instance.
3042          */
3043         if (!osnoise_has_registered_instances()) {
3044                 for_each_online_cpu(cpu)
3045                         per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
3046         }
3047
3048         osnoise_workload_stop();
3049 }
3050
3051 static int timerlat_tracer_init(struct trace_array *tr)
3052 {
3053         /*
3054          * Only allow timerlat tracer if osnoise tracer is not running already.
3055          */
3056         if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer)
3057                 return -EBUSY;
3058
3059         /*
3060          * If this is the first instance, set timerlat_tracer to block
3061          * osnoise tracer start.
3062          */
3063         if (!osnoise_has_registered_instances())
3064                 osnoise_data.timerlat_tracer = 1;
3065
3066         tr->max_latency = 0;
3067         timerlat_tracer_start(tr);
3068
3069         return 0;
3070 }
3071
3072 static void timerlat_tracer_reset(struct trace_array *tr)
3073 {
3074         timerlat_tracer_stop(tr);
3075
3076         /*
3077          * If this is the last instance, reset timerlat_tracer allowing
3078          * osnoise to be started.
3079          */
3080         if (!osnoise_has_registered_instances())
3081                 osnoise_data.timerlat_tracer = 0;
3082 }
3083
3084 static struct tracer timerlat_tracer __read_mostly = {
3085         .name           = "timerlat",
3086         .init           = timerlat_tracer_init,
3087         .reset          = timerlat_tracer_reset,
3088         .start          = timerlat_tracer_start,
3089         .stop           = timerlat_tracer_stop,
3090         .print_header   = print_timerlat_headers,
3091         .allow_instances = true,
3092 };
3093
3094 __init static int init_timerlat_tracer(void)
3095 {
3096         return register_tracer(&timerlat_tracer);
3097 }
3098 #else /* CONFIG_TIMERLAT_TRACER */
3099 __init static int init_timerlat_tracer(void)
3100 {
3101         return 0;
3102 }
3103 #endif /* CONFIG_TIMERLAT_TRACER */
3104
3105 __init static int init_osnoise_tracer(void)
3106 {
3107         int ret;
3108
3109         mutex_init(&interface_lock);
3110
3111         cpumask_copy(&osnoise_cpumask, cpu_all_mask);
3112
3113         ret = register_tracer(&osnoise_tracer);
3114         if (ret) {
3115                 pr_err(BANNER "Error registering osnoise!\n");
3116                 return ret;
3117         }
3118
3119         ret = init_timerlat_tracer();
3120         if (ret) {
3121                 pr_err(BANNER "Error registering timerlat!\n");
3122                 return ret;
3123         }
3124
3125         osnoise_init_hotplug_support();
3126
3127         INIT_LIST_HEAD_RCU(&osnoise_instances);
3128
3129         init_tracefs();
3130
3131         return 0;
3132 }
3133 late_initcall(init_osnoise_tracer);