include/linux/psi_types.h

   1 #ifndef _LINUX_PSI_TYPES_H
   2 #define _LINUX_PSI_TYPES_H
   3
   4 #include <linux/kthread.h>
   5 #include <linux/seqlock.h>
   6 #include <linux/types.h>
   7 #include <linux/kref.h>
   8 #include <linux/wait.h>
   9
  10 #ifdef CONFIG_PSI
  11
  12 /* Tracked task states */
  13 enum psi_task_count {
  14         NR_IOWAIT,
  15         NR_MEMSTALL,
  16         NR_RUNNING,
  17         /*
  18          * This can't have values other than 0 or 1 and could be
  19          * implemented as a bit flag. But for now we still have room
  20          * in the first cacheline of psi_group_cpu, and this way we
  21          * don't have to special case any state tracking for it.
  22          */
  23         NR_ONCPU,
  24         /*
  25          * For IO and CPU stalls the presence of running/oncpu tasks
  26          * in the domain means a partial rather than a full stall.
  27          * For memory it's not so simple because of page reclaimers:
  28          * they are running/oncpu while representing a stall. To tell
  29          * whether a domain has productivity left or not, we need to
  30          * distinguish between regular running (i.e. productive)
  31          * threads and memstall ones.
  32          */
  33         NR_MEMSTALL_RUNNING,
  34         NR_PSI_TASK_COUNTS = 5,
  35 };
  36
  37 /* Task state bitmasks */
  38 #define TSK_IOWAIT      (1 << NR_IOWAIT)
  39 #define TSK_MEMSTALL    (1 << NR_MEMSTALL)
  40 #define TSK_RUNNING     (1 << NR_RUNNING)
  41 #define TSK_ONCPU       (1 << NR_ONCPU)
  42 #define TSK_MEMSTALL_RUNNING    (1 << NR_MEMSTALL_RUNNING)
  43
  44 /* Resources that workloads could be stalled on */
  45 enum psi_res {
  46         PSI_IO,
  47         PSI_MEM,
  48         PSI_CPU,
  49         NR_PSI_RESOURCES = 3,
  50 };
  51
  52 /*
  53  * Pressure states for each resource:
  54  *
  55  * SOME: Stalled tasks & working tasks
  56  * FULL: Stalled tasks & no working tasks
  57  */
  58 enum psi_states {
  59         PSI_IO_SOME,
  60         PSI_IO_FULL,
  61         PSI_MEM_SOME,
  62         PSI_MEM_FULL,
  63         PSI_CPU_SOME,
  64         PSI_CPU_FULL,
  65         /* Only per-CPU, to weigh the CPU in the global average: */
  66         PSI_NONIDLE,
  67         NR_PSI_STATES = 7,
  68 };
  69
  70 enum psi_aggregators {
  71         PSI_AVGS = 0,
  72         PSI_POLL,
  73         NR_PSI_AGGREGATORS,
  74 };
  75
  76 struct psi_group_cpu {
  77         /* 1st cacheline updated by the scheduler */
  78
  79         /* Aggregator needs to know of concurrent changes */
  80         seqcount_t seq ____cacheline_aligned_in_smp;
  81
  82         /* States of the tasks belonging to this group */
  83         unsigned int tasks[NR_PSI_TASK_COUNTS];
  84
  85         /* Aggregate pressure state derived from the tasks */
  86         u32 state_mask;
  87
  88         /* Period time sampling buckets for each state of interest (ns) */
  89         u32 times[NR_PSI_STATES];
  90
  91         /* Time of last task change in this group (rq_clock) */
  92         u64 state_start;
  93
  94         /* 2nd cacheline updated by the aggregator */
  95
  96         /* Delta detection against the sampling buckets */
  97         u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
  98                         ____cacheline_aligned_in_smp;
  99 };
 100
 101 /* PSI growth tracking window */
 102 struct psi_window {
 103         /* Window size in ns */
 104         u64 size;
 105
 106         /* Start time of the current window in ns */
 107         u64 start_time;
 108
 109         /* Value at the start of the window */
 110         u64 start_value;
 111
 112         /* Value growth in the previous window */
 113         u64 prev_growth;
 114 };
 115
 116 struct psi_trigger {
 117         /* PSI state being monitored by the trigger */
 118         enum psi_states state;
 119
 120         /* User-spacified threshold in ns */
 121         u64 threshold;
 122
 123         /* List node inside triggers list */
 124         struct list_head node;
 125
 126         /* Backpointer needed during trigger destruction */
 127         struct psi_group *group;
 128
 129         /* Wait queue for polling */
 130         wait_queue_head_t event_wait;
 131
 132         /* Pending event flag */
 133         int event;
 134
 135         /* Tracking window */
 136         struct psi_window win;
 137
 138         /*
 139          * Time last event was generated. Used for rate-limiting
 140          * events to one per window
 141          */
 142         u64 last_event_time;
 143 };
 144
 145 struct psi_group {
 146         /* Protects data used by the aggregator */
 147         struct mutex avgs_lock;
 148
 149         /* Per-cpu task state & time tracking */
 150         struct psi_group_cpu __percpu *pcpu;
 151
 152         /* Running pressure averages */
 153         u64 avg_total[NR_PSI_STATES - 1];
 154         u64 avg_last_update;
 155         u64 avg_next_update;
 156
 157         /* Aggregator work control */
 158         struct delayed_work avgs_work;
 159
 160         /* Total stall times and sampled pressure averages */
 161         u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
 162         unsigned long avg[NR_PSI_STATES - 1][3];
 163
 164         /* Monitor work control */
 165         struct task_struct __rcu *poll_task;
 166         struct timer_list poll_timer;
 167         wait_queue_head_t poll_wait;
 168         atomic_t poll_wakeup;
 169
 170         /* Protects data used by the monitor */
 171         struct mutex trigger_lock;
 172
 173         /* Configured polling triggers */
 174         struct list_head triggers;
 175         u32 nr_triggers[NR_PSI_STATES - 1];
 176         u32 poll_states;
 177         u64 poll_min_period;
 178
 179         /* Total stall times at the start of monitor activation */
 180         u64 polling_total[NR_PSI_STATES - 1];
 181         u64 polling_next_update;
 182         u64 polling_until;
 183 };
 184
 185 #else /* CONFIG_PSI */
 186
 187 struct psi_group { };
 188
 189 #endif /* CONFIG_PSI */
 190
 191 #endif /* _LINUX_PSI_TYPES_H */