1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (C) 2019, Google LLC.
8 #include <linux/debug_locks.h>
9 #include <linux/delay.h>
10 #include <linux/jiffies.h>
11 #include <linux/kernel.h>
12 #include <linux/lockdep.h>
13 #include <linux/preempt.h>
14 #include <linux/printk.h>
15 #include <linux/sched.h>
16 #include <linux/spinlock.h>
17 #include <linux/stacktrace.h>
23 * Max. number of stack entries to show in the report.
25 #define NUM_STACK_ENTRIES 64
27 /* Common access info. */
29 const volatile void *ptr;
37 * Other thread info: communicated from other racing thread to thread that set
38 * up the watchpoint, which then prints the complete report atomically.
41 struct access_info ai;
42 unsigned long stack_entries[NUM_STACK_ENTRIES];
43 int num_stack_entries;
46 * Optionally pass @current. Typically we do not need to pass @current
47 * via @other_info since just @task_pid is sufficient. Passing @current
48 * has additional overhead.
50 * To safely pass @current, we must either use get_task_struct/
51 * put_task_struct, or stall the thread that populated @other_info.
53 * We cannot rely on get_task_struct/put_task_struct in case
54 * release_report() races with a task being released, and would have to
55 * free it in release_report(). This may result in deadlock if we want
56 * to use KCSAN on the allocators.
58 * Since we also want to reliably print held locks for
59 * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread
60 * that populated @other_info until it has been consumed.
62 struct task_struct *task;
66 * To never block any producers of struct other_info, we need as many elements
67 * as we have watchpoints (upper bound on concurrent races to report).
69 static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
72 * Information about reported races; used to rate limit reporting.
76 * The last time the race was reported.
81 * The frames of the 2 threads; if only 1 thread is known, one frame
89 * Since we also want to be able to debug allocators with KCSAN, to avoid
90 * deadlock, report_times cannot be dynamically resized with krealloc in
93 * Therefore, we use a fixed-size array, which at most will occupy a page. This
94 * still adequately rate limits reports, assuming that a) number of unique data
95 * races is not excessive, and b) occurrence of unique races within the
96 * same time window is limited.
98 #define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time))
99 #define REPORT_TIMES_SIZE \
100 (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ? \
102 CONFIG_KCSAN_REPORT_ONCE_IN_MS)
103 static struct report_time report_times[REPORT_TIMES_SIZE];
106 * Spinlock serializing report generation, and access to @other_infos. Although
107 * it could make sense to have a finer-grained locking story for @other_infos,
108 * report generation needs to be serialized either way, so not much is gained.
110 static DEFINE_RAW_SPINLOCK(report_lock);
113 * Checks if the race identified by thread frames frame1 and frame2 has
114 * been reported since (now - KCSAN_REPORT_ONCE_IN_MS).
116 static bool rate_limit_report(unsigned long frame1, unsigned long frame2)
118 struct report_time *use_entry = &report_times[0];
119 unsigned long invalid_before;
122 BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0);
124 if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0)
127 invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS);
129 /* Check if a matching race report exists. */
130 for (i = 0; i < REPORT_TIMES_SIZE; ++i) {
131 struct report_time *rt = &report_times[i];
134 * Must always select an entry for use to store info as we
135 * cannot resize report_times; at the end of the scan, use_entry
136 * will be the oldest entry, which ideally also happened before
137 * KCSAN_REPORT_ONCE_IN_MS ago.
139 if (time_before(rt->time, use_entry->time))
143 * Initially, no need to check any further as this entry as well
144 * as following entries have never been used.
149 /* Check if entry expired. */
150 if (time_before(rt->time, invalid_before))
151 continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */
153 /* Reported recently, check if race matches. */
154 if ((rt->frame1 == frame1 && rt->frame2 == frame2) ||
155 (rt->frame1 == frame2 && rt->frame2 == frame1))
159 use_entry->time = jiffies;
160 use_entry->frame1 = frame1;
161 use_entry->frame2 = frame2;
166 * Special rules to skip reporting.
169 skip_report(enum kcsan_value_change value_change, unsigned long top_frame)
171 /* Should never get here if value_change==FALSE. */
172 WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE);
175 * The first call to skip_report always has value_change==TRUE, since we
176 * cannot know the value written of an instrumented access. For the 2nd
177 * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY:
179 * 1. read watchpoint, conflicting write (value_change==TRUE): report;
180 * 2. read watchpoint, conflicting write (value_change==MAYBE): skip;
181 * 3. write watchpoint, conflicting write (value_change==TRUE): report;
182 * 4. write watchpoint, conflicting write (value_change==MAYBE): skip;
183 * 5. write watchpoint, conflicting read (value_change==MAYBE): skip;
184 * 6. write watchpoint, conflicting read (value_change==TRUE): report;
186 * Cases 1-4 are intuitive and expected; case 5 ensures we do not report
187 * data races where the write may have rewritten the same value; case 6
188 * is possible either if the size is larger than what we check value
189 * changes for or the access type is KCSAN_ACCESS_ASSERT.
191 if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) &&
192 value_change == KCSAN_VALUE_CHANGE_MAYBE) {
194 * The access is a write, but the data value did not change.
196 * We opt-out of this filter for certain functions at request of
200 int len = scnprintf(buf, sizeof(buf), "%ps", (void *)top_frame);
202 if (!strnstr(buf, "rcu_", len) &&
203 !strnstr(buf, "_rcu", len) &&
204 !strnstr(buf, "_srcu", len))
208 return kcsan_skip_report_debugfs(top_frame);
211 static const char *get_access_type(int type)
213 if (type & KCSAN_ACCESS_ASSERT) {
214 if (type & KCSAN_ACCESS_SCOPED) {
215 if (type & KCSAN_ACCESS_WRITE)
216 return "assert no accesses (scoped)";
218 return "assert no writes (scoped)";
220 if (type & KCSAN_ACCESS_WRITE)
221 return "assert no accesses";
223 return "assert no writes";
230 case KCSAN_ACCESS_ATOMIC:
231 return "read (marked)";
232 case KCSAN_ACCESS_WRITE:
234 case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
235 return "write (marked)";
236 case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE:
238 case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
239 return "read-write (marked)";
240 case KCSAN_ACCESS_SCOPED:
241 return "read (scoped)";
242 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC:
243 return "read (marked, scoped)";
244 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE:
245 return "write (scoped)";
246 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
247 return "write (marked, scoped)";
253 static const char *get_bug_type(int type)
255 return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race";
258 /* Return thread description: in task or interrupt. */
259 static const char *get_thread_desc(int task_id)
262 static char buf[32]; /* safe: protected by report_lock */
264 snprintf(buf, sizeof(buf), "task %i", task_id);
270 /* Helper to skip KCSAN-related functions in stack-trace. */
271 static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries)
277 for (skip = 0; skip < num_entries; ++skip) {
278 len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]);
280 /* Never show tsan_* or {read,write}_once_size. */
281 if (strnstr(buf, "tsan_", len) ||
282 strnstr(buf, "_once_size", len))
285 cur = strnstr(buf, "kcsan_", len);
287 cur += strlen("kcsan_");
288 if (!str_has_prefix(cur, "test"))
289 continue; /* KCSAN runtime function. */
290 /* KCSAN related test. */
294 * No match for runtime functions -- @skip entries to skip to
295 * get to first frame of interest.
303 /* Compares symbolized strings of addr1 and addr2. */
304 static int sym_strcmp(void *addr1, void *addr2)
309 snprintf(buf1, sizeof(buf1), "%pS", addr1);
310 snprintf(buf2, sizeof(buf2), "%pS", addr2);
312 return strncmp(buf1, buf2, sizeof(buf1));
315 static void print_verbose_info(struct task_struct *task)
320 /* Restore IRQ state trace for printing. */
321 kcsan_restore_irqtrace(task);
324 debug_show_held_locks(task);
325 print_irqtrace_events(task);
329 * Returns true if a report was generated, false otherwise.
331 static bool print_report(enum kcsan_value_change value_change,
332 enum kcsan_report_type type,
333 const struct access_info *ai,
334 const struct other_info *other_info)
336 unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 };
337 int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1);
338 int skipnr = get_stack_skipnr(stack_entries, num_stack_entries);
339 unsigned long this_frame = stack_entries[skipnr];
340 unsigned long other_frame = 0;
341 int other_skipnr = 0; /* silence uninit warnings */
344 * Must check report filter rules before starting to print.
346 if (skip_report(KCSAN_VALUE_CHANGE_TRUE, stack_entries[skipnr]))
349 if (type == KCSAN_REPORT_RACE_SIGNAL) {
350 other_skipnr = get_stack_skipnr(other_info->stack_entries,
351 other_info->num_stack_entries);
352 other_frame = other_info->stack_entries[other_skipnr];
354 /* @value_change is only known for the other thread */
355 if (skip_report(value_change, other_frame))
359 if (rate_limit_report(this_frame, other_frame))
362 /* Print report header. */
363 pr_err("==================================================================\n");
365 case KCSAN_REPORT_RACE_SIGNAL: {
369 * Order functions lexographically for consistent bug titles.
370 * Do not print offset of functions to keep title short.
372 cmp = sym_strcmp((void *)other_frame, (void *)this_frame);
373 pr_err("BUG: KCSAN: %s in %ps / %ps\n",
374 get_bug_type(ai->access_type | other_info->ai.access_type),
375 (void *)(cmp < 0 ? other_frame : this_frame),
376 (void *)(cmp < 0 ? this_frame : other_frame));
379 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
380 pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type),
390 /* Print information about the racing accesses. */
392 case KCSAN_REPORT_RACE_SIGNAL:
393 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
394 get_access_type(other_info->ai.access_type), other_info->ai.ptr,
395 other_info->ai.size, get_thread_desc(other_info->ai.task_pid),
396 other_info->ai.cpu_id);
398 /* Print the other thread's stack trace. */
399 stack_trace_print(other_info->stack_entries + other_skipnr,
400 other_info->num_stack_entries - other_skipnr,
403 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
404 print_verbose_info(other_info->task);
407 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
408 get_access_type(ai->access_type), ai->ptr, ai->size,
409 get_thread_desc(ai->task_pid), ai->cpu_id);
412 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
413 pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n",
414 get_access_type(ai->access_type), ai->ptr, ai->size,
415 get_thread_desc(ai->task_pid), ai->cpu_id);
421 /* Print stack trace of this thread. */
422 stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr,
425 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
426 print_verbose_info(current);
428 /* Print report footer. */
430 pr_err("Reported by Kernel Concurrency Sanitizer on:\n");
431 dump_stack_print_info(KERN_DEFAULT);
432 pr_err("==================================================================\n");
437 static void release_report(unsigned long *flags, struct other_info *other_info)
441 * Use size to denote valid/invalid, since KCSAN entirely
442 * ignores 0-sized accesses.
444 other_info->ai.size = 0;
446 raw_spin_unlock_irqrestore(&report_lock, *flags);
450 * Sets @other_info->task and awaits consumption of @other_info.
452 * Precondition: report_lock is held.
453 * Postcondition: report_lock is held.
455 static void set_other_info_task_blocking(unsigned long *flags,
456 const struct access_info *ai,
457 struct other_info *other_info)
460 * We may be instrumenting a code-path where current->state is already
461 * something other than TASK_RUNNING.
463 const bool is_running = current->state == TASK_RUNNING;
465 * To avoid deadlock in case we are in an interrupt here and this is a
466 * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a
467 * timeout to ensure this works in all contexts.
469 * Await approximately the worst case delay of the reporting thread (if
470 * we are not interrupted).
472 int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt);
474 other_info->task = current;
478 * Let lockdep know the real task is sleeping, to print
479 * the held locks (recall we turned lockdep off, so
480 * locking/unlocking @report_lock won't be recorded).
482 set_current_state(TASK_UNINTERRUPTIBLE);
484 raw_spin_unlock_irqrestore(&report_lock, *flags);
486 * We cannot call schedule() since we also cannot reliably
487 * determine if sleeping here is permitted -- see in_atomic().
491 raw_spin_lock_irqsave(&report_lock, *flags);
494 * Abort. Reset @other_info->task to NULL, since it
495 * appears the other thread is still going to consume
496 * it. It will result in no verbose info printed for
499 other_info->task = NULL;
503 * If invalid, or @ptr nor @current matches, then @other_info
504 * has been consumed and we may continue. If not, retry.
506 } while (other_info->ai.size && other_info->ai.ptr == ai->ptr &&
507 other_info->task == current);
509 set_current_state(TASK_RUNNING);
512 /* Populate @other_info; requires that the provided @other_info not in use. */
513 static void prepare_report_producer(unsigned long *flags,
514 const struct access_info *ai,
515 struct other_info *other_info)
517 raw_spin_lock_irqsave(&report_lock, *flags);
520 * The same @other_infos entry cannot be used concurrently, because
521 * there is a one-to-one mapping to watchpoint slots (@watchpoints in
522 * core.c), and a watchpoint is only released for reuse after reporting
523 * is done by the consumer of @other_info. Therefore, it is impossible
524 * for another concurrent prepare_report_producer() to set the same
525 * @other_info, and are guaranteed exclusivity for the @other_infos
526 * entry pointed to by @other_info.
528 * To check this property holds, size should never be non-zero here,
529 * because every consumer of struct other_info resets size to 0 in
532 WARN_ON(other_info->ai.size);
534 other_info->ai = *ai;
535 other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2);
537 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
538 set_other_info_task_blocking(flags, ai, other_info);
540 raw_spin_unlock_irqrestore(&report_lock, *flags);
543 /* Awaits producer to fill @other_info and then returns. */
544 static bool prepare_report_consumer(unsigned long *flags,
545 const struct access_info *ai,
546 struct other_info *other_info)
549 raw_spin_lock_irqsave(&report_lock, *flags);
550 while (!other_info->ai.size) { /* Await valid @other_info. */
551 raw_spin_unlock_irqrestore(&report_lock, *flags);
553 raw_spin_lock_irqsave(&report_lock, *flags);
556 /* Should always have a matching access based on watchpoint encoding. */
557 if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size,
558 (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size)))
561 if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size,
562 (unsigned long)ai->ptr, ai->size)) {
564 * If the actual accesses to not match, this was a false
565 * positive due to watchpoint encoding.
567 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ENCODING_FALSE_POSITIVES]);
574 release_report(flags, other_info);
579 * Depending on the report type either sets @other_info and returns false, or
580 * awaits @other_info and returns true. If @other_info is not required for the
581 * report type, simply acquires @report_lock and returns true.
583 static noinline bool prepare_report(unsigned long *flags,
584 enum kcsan_report_type type,
585 const struct access_info *ai,
586 struct other_info *other_info)
589 case KCSAN_REPORT_CONSUMED_WATCHPOINT:
590 prepare_report_producer(flags, ai, other_info);
592 case KCSAN_REPORT_RACE_SIGNAL:
593 return prepare_report_consumer(flags, ai, other_info);
595 /* @other_info not required; just acquire @report_lock. */
596 raw_spin_lock_irqsave(&report_lock, *flags);
601 void kcsan_report(const volatile void *ptr, size_t size, int access_type,
602 enum kcsan_value_change value_change,
603 enum kcsan_report_type type, int watchpoint_idx)
605 unsigned long flags = 0;
606 const struct access_info ai = {
609 .access_type = access_type,
610 .task_pid = in_task() ? task_pid_nr(current) : -1,
611 .cpu_id = raw_smp_processor_id()
613 struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
614 ? NULL : &other_infos[watchpoint_idx];
616 kcsan_disable_current();
617 if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos)))
621 * Because we may generate reports when we're in scheduler code, the use
622 * of printk() could deadlock. Until such time that all printing code
623 * called in print_report() is scheduler-safe, accept the risk, and just
624 * get our message out. As such, also disable lockdep to hide the
625 * warning, and avoid disabling lockdep for the rest of the kernel.
629 if (prepare_report(&flags, type, &ai, other_info)) {
631 * Never report if value_change is FALSE, only if we it is
632 * either TRUE or MAYBE. In case of MAYBE, further filtering may
633 * be done once we know the full stack trace in print_report().
635 bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE &&
636 print_report(value_change, type, &ai, other_info);
638 if (reported && panic_on_warn)
639 panic("panic_on_warn set ...\n");
641 release_report(&flags, other_info);
646 kcsan_enable_current();