3 # Copyright (C) 2023 Tejun Heo <tj@kernel.org>
4 # Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
7 This is a drgn script to monitor workqueues. For more info on drgn, visit
8 https://github.com/osandov/drgn.
10 total Total number of work items executed by the workqueue.
12 infl The number of currently in-flight work items.
14 CPUtime Total CPU time consumed by the workqueue in seconds. This is
15 sampled from scheduler ticks and only provides ballpark
16 measurement. "nohz_full=" CPUs are excluded from measurement.
18 CPUitsv The number of times a concurrency-managed work item hogged CPU
19 longer than the threshold (workqueue.cpu_intensive_thresh_us)
20 and got excluded from concurrency management to avoid stalling
23 CMW/RPR For per-cpu workqueues, the number of concurrency-management
24 wake-ups while executing a work item of the workqueue. For
25 unbound workqueues, the number of times a worker was repatriated
26 to its affinity scope after being migrated to an off-scope CPU by
29 mayday The number of times the rescuer was requested while waiting for
32 rescued The number of work items executed by the rescuer.
43 from drgn.helpers.linux.list import list_for_each_entry,list_empty
44 from drgn.helpers.linux.cpumask import for_each_possible_cpu
47 parser = argparse.ArgumentParser(description=desc,
48 formatter_class=argparse.RawTextHelpFormatter)
49 parser.add_argument('workqueue', metavar='REGEX', nargs='*',
50 help='Target workqueue name patterns (all if empty)')
51 parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
52 help='Monitoring interval (0 to print once and exit)')
53 parser.add_argument('-j', '--json', action='store_true',
54 help='Output in json')
55 args = parser.parse_args()
58 print(s, file=sys.stderr, flush=True)
61 workqueues = prog['workqueues']
63 WQ_UNBOUND = prog['WQ_UNBOUND']
64 WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
66 PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution
67 PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution
68 PWQ_STAT_CPU_TIME = prog['PWQ_STAT_CPU_TIME'] # total CPU time consumed
69 PWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations
70 PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups
71 PWQ_STAT_REPATRIATED = prog['PWQ_STAT_REPATRIATED'] # unbound workers brought back into scope
72 PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer
73 PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer
74 PWQ_NR_STATS = prog['PWQ_NR_STATS']
77 def __init__(self, wq):
78 self.name = wq.name.string_().decode()
79 self.unbound = wq.flags & WQ_UNBOUND != 0
80 self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
81 self.stats = [0] * PWQ_NR_STATS
82 for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
83 for i in range(PWQ_NR_STATS):
84 self.stats[i] += int(pwq.stats[i])
87 return { 'timestamp' : now,
89 'unbound' : self.unbound,
90 'mem_reclaim' : self.mem_reclaim,
91 'started' : self.stats[PWQ_STAT_STARTED],
92 'completed' : self.stats[PWQ_STAT_COMPLETED],
93 'cpu_time' : self.stats[PWQ_STAT_CPU_TIME],
94 'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE],
95 'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP],
96 'repatriated' : self.stats[PWQ_STAT_REPATRIATED],
97 'mayday' : self.stats[PWQ_STAT_MAYDAY],
98 'rescued' : self.stats[PWQ_STAT_RESCUED], }
100 def table_header_str():
101 return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\
102 f'{"CPUitsv":>7} {"CMW/RPR":>7} {"mayday":>7} {"rescued":>7}'
104 def table_row_str(self):
111 cmw_rpr = str(self.stats[PWQ_STAT_REPATRIATED]);
113 cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE])
114 cmw_rpr = str(self.stats[PWQ_STAT_CM_WAKEUP])
117 mayday = str(self.stats[PWQ_STAT_MAYDAY])
118 rescued = str(self.stats[PWQ_STAT_RESCUED])
120 out = f'{self.name[-24:]:24} ' \
121 f'{self.stats[PWQ_STAT_STARTED]:8} ' \
122 f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
123 f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \
124 f'{cpu_intensive:>7} ' \
128 return out.rstrip(':')
132 def sigint_handler(signr, frame):
138 table_fmt = not args.json
139 interval = args.interval
143 for r in args.workqueue:
149 filter_re = re.compile(re_str) if re_str else None
152 signal.signal(signal.SIGINT, sigint_handler)
159 print(WqStats.table_header_str())
161 for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
163 if filter_re and not filter_re.search(stats.name):
166 print(stats.table_row_str())
168 print(stats.dict(now))
174 if __name__ == "__main__":