1 #ifndef _LINUX_PSI_TYPES_H
2 #define _LINUX_PSI_TYPES_H
4 #include <linux/kthread.h>
5 #include <linux/seqlock.h>
6 #include <linux/types.h>
7 #include <linux/kref.h>
8 #include <linux/wait.h>
12 /* Tracked task states */
18 * This can't have values other than 0 or 1 and could be
19 * implemented as a bit flag. But for now we still have room
20 * in the first cacheline of psi_group_cpu, and this way we
21 * don't have to special case any state tracking for it.
25 * For IO and CPU stalls the presence of running/oncpu tasks
26 * in the domain means a partial rather than a full stall.
27 * For memory it's not so simple because of page reclaimers:
28 * they are running/oncpu while representing a stall. To tell
29 * whether a domain has productivity left or not, we need to
30 * distinguish between regular running (i.e. productive)
31 * threads and memstall ones.
34 NR_PSI_TASK_COUNTS = 5,
37 /* Task state bitmasks */
38 #define TSK_IOWAIT (1 << NR_IOWAIT)
39 #define TSK_MEMSTALL (1 << NR_MEMSTALL)
40 #define TSK_RUNNING (1 << NR_RUNNING)
41 #define TSK_ONCPU (1 << NR_ONCPU)
42 #define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
44 /* Resources that workloads could be stalled on */
53 * Pressure states for each resource:
55 * SOME: Stalled tasks & working tasks
56 * FULL: Stalled tasks & no working tasks
65 /* Only per-CPU, to weigh the CPU in the global average: */
70 enum psi_aggregators {
76 struct psi_group_cpu {
77 /* 1st cacheline updated by the scheduler */
79 /* Aggregator needs to know of concurrent changes */
80 seqcount_t seq ____cacheline_aligned_in_smp;
82 /* States of the tasks belonging to this group */
83 unsigned int tasks[NR_PSI_TASK_COUNTS];
85 /* Aggregate pressure state derived from the tasks */
88 /* Period time sampling buckets for each state of interest (ns) */
89 u32 times[NR_PSI_STATES];
91 /* Time of last task change in this group (rq_clock) */
94 /* 2nd cacheline updated by the aggregator */
96 /* Delta detection against the sampling buckets */
97 u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
98 ____cacheline_aligned_in_smp;
101 /* PSI growth tracking window */
103 /* Window size in ns */
106 /* Start time of the current window in ns */
109 /* Value at the start of the window */
112 /* Value growth in the previous window */
117 /* PSI state being monitored by the trigger */
118 enum psi_states state;
120 /* User-spacified threshold in ns */
123 /* List node inside triggers list */
124 struct list_head node;
126 /* Backpointer needed during trigger destruction */
127 struct psi_group *group;
129 /* Wait queue for polling */
130 wait_queue_head_t event_wait;
132 /* Pending event flag */
135 /* Tracking window */
136 struct psi_window win;
139 * Time last event was generated. Used for rate-limiting
140 * events to one per window
146 /* Protects data used by the aggregator */
147 struct mutex avgs_lock;
149 /* Per-cpu task state & time tracking */
150 struct psi_group_cpu __percpu *pcpu;
152 /* Running pressure averages */
153 u64 avg_total[NR_PSI_STATES - 1];
157 /* Aggregator work control */
158 struct delayed_work avgs_work;
160 /* Total stall times and sampled pressure averages */
161 u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
162 unsigned long avg[NR_PSI_STATES - 1][3];
164 /* Monitor work control */
165 struct task_struct __rcu *poll_task;
166 struct timer_list poll_timer;
167 wait_queue_head_t poll_wait;
168 atomic_t poll_wakeup;
170 /* Protects data used by the monitor */
171 struct mutex trigger_lock;
173 /* Configured polling triggers */
174 struct list_head triggers;
175 u32 nr_triggers[NR_PSI_STATES - 1];
179 /* Total stall times at the start of monitor activation */
180 u64 polling_total[NR_PSI_STATES - 1];
181 u64 polling_next_update;
185 #else /* CONFIG_PSI */
187 struct psi_group { };
189 #endif /* CONFIG_PSI */
191 #endif /* _LINUX_PSI_TYPES_H */