kernel/sched/cputime.c

   1 #include <linux/export.h>
   2 #include <linux/sched.h>
   3 #include <linux/tsacct_kern.h>
   4 #include <linux/kernel_stat.h>
   5 #include <linux/static_key.h>
   6 #include <linux/context_tracking.h>
   7 #include "sched.h"
   8 #ifdef CONFIG_PARAVIRT
   9 #include <asm/paravirt.h>
  10 #endif
  11
  12
  13 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  14
  15 /*
  16  * There are no locks covering percpu hardirq/softirq time.
  17  * They are only modified in vtime_account, on corresponding CPU
  18  * with interrupts disabled. So, writes are safe.
  19  * They are read and saved off onto struct rq in update_rq_clock().
  20  * This may result in other CPU reading this CPU's irq time and can
  21  * race with irq/vtime_account on this CPU. We would either get old
  22  * or new value with a side effect of accounting a slice of irq time to wrong
  23  * task when irq is in progress while we read rq->clock. That is a worthy
  24  * compromise in place of having locks on each irq in account_system_time.
  25  */
  26 DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
  27
  28 static int sched_clock_irqtime;
  29
  30 void enable_sched_clock_irqtime(void)
  31 {
  32         sched_clock_irqtime = 1;
  33 }
  34
  35 void disable_sched_clock_irqtime(void)
  36 {
  37         sched_clock_irqtime = 0;
  38 }
  39
  40 static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
  41                                   enum cpu_usage_stat idx)
  42 {
  43         u64 *cpustat = kcpustat_this_cpu->cpustat;
  44
  45         u64_stats_update_begin(&irqtime->sync);
  46         cpustat[idx] += delta;
  47         irqtime->total += delta;
  48         irqtime->tick_delta += delta;
  49         u64_stats_update_end(&irqtime->sync);
  50 }
  51
  52 /*
  53  * Called before incrementing preempt_count on {soft,}irq_enter
  54  * and before decrementing preempt_count on {soft,}irq_exit.
  55  */
  56 void irqtime_account_irq(struct task_struct *curr)
  57 {
  58         struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
  59         s64 delta;
  60         int cpu;
  61
  62         if (!sched_clock_irqtime)
  63                 return;
  64
  65         cpu = smp_processor_id();
  66         delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
  67         irqtime->irq_start_time += delta;
  68
  69         /*
  70          * We do not account for softirq time from ksoftirqd here.
  71          * We want to continue accounting softirq time to ksoftirqd thread
  72          * in that case, so as not to confuse scheduler with a special task
  73          * that do not consume any time, but still wants to run.
  74          */
  75         if (hardirq_count())
  76                 irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
  77         else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
  78                 irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
  79 }
  80 EXPORT_SYMBOL_GPL(irqtime_account_irq);
  81
  82 static cputime_t irqtime_tick_accounted(cputime_t maxtime)
  83 {
  84         struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
  85         cputime_t delta;
  86
  87         delta = nsecs_to_cputime(irqtime->tick_delta);
  88         delta = min(delta, maxtime);
  89         irqtime->tick_delta -= cputime_to_nsecs(delta);
  90
  91         return delta;
  92 }
  93
  94 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
  95
  96 #define sched_clock_irqtime     (0)
  97
  98 static cputime_t irqtime_tick_accounted(cputime_t dummy)
  99 {
 100         return 0;
 101 }
 102
 103 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
 104
 105 static inline void task_group_account_field(struct task_struct *p, int index,
 106                                             u64 tmp)
 107 {
 108         /*
 109          * Since all updates are sure to touch the root cgroup, we
 110          * get ourselves ahead and touch it first. If the root cgroup
 111          * is the only cgroup, then nothing else should be necessary.
 112          *
 113          */
 114         __this_cpu_add(kernel_cpustat.cpustat[index], tmp);
 115
 116         cpuacct_account_field(p, index, tmp);
 117 }
 118
 119 /*
 120  * Account user cpu time to a process.
 121  * @p: the process that the cpu time gets accounted to
 122  * @cputime: the cpu time spent in user space since the last update
 123  * @cputime_scaled: cputime scaled by cpu frequency
 124  */
 125 void account_user_time(struct task_struct *p, cputime_t cputime,
 126                        cputime_t cputime_scaled)
 127 {
 128         int index;
 129
 130         /* Add user time to process. */
 131         p->utime += cputime;
 132         p->utimescaled += cputime_scaled;
 133         account_group_user_time(p, cputime);
 134
 135         index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
 136
 137         /* Add user time to cpustat. */
 138         task_group_account_field(p, index, cputime_to_nsecs(cputime));
 139
 140         /* Account for user time used */
 141         acct_account_cputime(p);
 142 }
 143
 144 /*
 145  * Account guest cpu time to a process.
 146  * @p: the process that the cpu time gets accounted to
 147  * @cputime: the cpu time spent in virtual machine since the last update
 148  * @cputime_scaled: cputime scaled by cpu frequency
 149  */
 150 static void account_guest_time(struct task_struct *p, cputime_t cputime,
 151                                cputime_t cputime_scaled)
 152 {
 153         u64 *cpustat = kcpustat_this_cpu->cpustat;
 154
 155         /* Add guest time to process. */
 156         p->utime += cputime;
 157         p->utimescaled += cputime_scaled;
 158         account_group_user_time(p, cputime);
 159         p->gtime += cputime;
 160
 161         /* Add guest time to cpustat. */
 162         if (task_nice(p) > 0) {
 163                 cpustat[CPUTIME_NICE] += cputime_to_nsecs(cputime);
 164                 cpustat[CPUTIME_GUEST_NICE] += cputime_to_nsecs(cputime);
 165         } else {
 166                 cpustat[CPUTIME_USER] += cputime_to_nsecs(cputime);
 167                 cpustat[CPUTIME_GUEST] += cputime_to_nsecs(cputime);
 168         }
 169 }
 170
 171 /*
 172  * Account system cpu time to a process and desired cpustat field
 173  * @p: the process that the cpu time gets accounted to
 174  * @cputime: the cpu time spent in kernel space since the last update
 175  * @cputime_scaled: cputime scaled by cpu frequency
 176  * @target_cputime64: pointer to cpustat field that has to be updated
 177  */
 178 static inline
 179 void __account_system_time(struct task_struct *p, cputime_t cputime,
 180                         cputime_t cputime_scaled, int index)
 181 {
 182         /* Add system time to process. */
 183         p->stime += cputime;
 184         p->stimescaled += cputime_scaled;
 185         account_group_system_time(p, cputime);
 186
 187         /* Add system time to cpustat. */
 188         task_group_account_field(p, index, cputime_to_nsecs(cputime));
 189
 190         /* Account for system time used */
 191         acct_account_cputime(p);
 192 }
 193
 194 /*
 195  * Account system cpu time to a process.
 196  * @p: the process that the cpu time gets accounted to
 197  * @hardirq_offset: the offset to subtract from hardirq_count()
 198  * @cputime: the cpu time spent in kernel space since the last update
 199  * @cputime_scaled: cputime scaled by cpu frequency
 200  */
 201 void account_system_time(struct task_struct *p, int hardirq_offset,
 202                          cputime_t cputime, cputime_t cputime_scaled)
 203 {
 204         int index;
 205
 206         if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
 207                 account_guest_time(p, cputime, cputime_scaled);
 208                 return;
 209         }
 210
 211         if (hardirq_count() - hardirq_offset)
 212                 index = CPUTIME_IRQ;
 213         else if (in_serving_softirq())
 214                 index = CPUTIME_SOFTIRQ;
 215         else
 216                 index = CPUTIME_SYSTEM;
 217
 218         __account_system_time(p, cputime, cputime_scaled, index);
 219 }
 220
 221 /*
 222  * Account for involuntary wait time.
 223  * @cputime: the cpu time spent in involuntary wait
 224  */
 225 void account_steal_time(cputime_t cputime)
 226 {
 227         u64 *cpustat = kcpustat_this_cpu->cpustat;
 228
 229         cpustat[CPUTIME_STEAL] += cputime_to_nsecs(cputime);
 230 }
 231
 232 /*
 233  * Account for idle time.
 234  * @cputime: the cpu time spent in idle wait
 235  */
 236 void account_idle_time(cputime_t cputime)
 237 {
 238         u64 *cpustat = kcpustat_this_cpu->cpustat;
 239         struct rq *rq = this_rq();
 240
 241         if (atomic_read(&rq->nr_iowait) > 0)
 242                 cpustat[CPUTIME_IOWAIT] += cputime_to_nsecs(cputime);
 243         else
 244                 cpustat[CPUTIME_IDLE] += cputime_to_nsecs(cputime);
 245 }
 246
 247 /*
 248  * When a guest is interrupted for a longer amount of time, missed clock
 249  * ticks are not redelivered later. Due to that, this function may on
 250  * occasion account more time than the calling functions think elapsed.
 251  */
 252 static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
 253 {
 254 #ifdef CONFIG_PARAVIRT
 255         if (static_key_false(&paravirt_steal_enabled)) {
 256                 cputime_t steal_cputime;
 257                 u64 steal;
 258
 259                 steal = paravirt_steal_clock(smp_processor_id());
 260                 steal -= this_rq()->prev_steal_time;
 261
 262                 steal_cputime = min(nsecs_to_cputime(steal), maxtime);
 263                 account_steal_time(steal_cputime);
 264                 this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime);
 265
 266                 return steal_cputime;
 267         }
 268 #endif
 269         return 0;
 270 }
 271
 272 /*
 273  * Account how much elapsed time was spent in steal, irq, or softirq time.
 274  */
 275 static inline cputime_t account_other_time(cputime_t max)
 276 {
 277         cputime_t accounted;
 278
 279         /* Shall be converted to a lockdep-enabled lightweight check */
 280         WARN_ON_ONCE(!irqs_disabled());
 281
 282         accounted = steal_account_process_time(max);
 283
 284         if (accounted < max)
 285                 accounted += irqtime_tick_accounted(max - accounted);
 286
 287         return accounted;
 288 }
 289
 290 #ifdef CONFIG_64BIT
 291 static inline u64 read_sum_exec_runtime(struct task_struct *t)
 292 {
 293         return t->se.sum_exec_runtime;
 294 }
 295 #else
 296 static u64 read_sum_exec_runtime(struct task_struct *t)
 297 {
 298         u64 ns;
 299         struct rq_flags rf;
 300         struct rq *rq;
 301
 302         rq = task_rq_lock(t, &rf);
 303         ns = t->se.sum_exec_runtime;
 304         task_rq_unlock(rq, t, &rf);
 305
 306         return ns;
 307 }
 308 #endif
 309
 310 /*
 311  * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
 312  * tasks (sum on group iteration) belonging to @tsk's group.
 313  */
 314 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 315 {
 316         struct signal_struct *sig = tsk->signal;
 317         cputime_t utime, stime;
 318         struct task_struct *t;
 319         unsigned int seq, nextseq;
 320         unsigned long flags;
 321
 322         /*
 323          * Update current task runtime to account pending time since last
 324          * scheduler action or thread_group_cputime() call. This thread group
 325          * might have other running tasks on different CPUs, but updating
 326          * their runtime can affect syscall performance, so we skip account
 327          * those pending times and rely only on values updated on tick or
 328          * other scheduler action.
 329          */
 330         if (same_thread_group(current, tsk))
 331                 (void) task_sched_runtime(current);
 332
 333         rcu_read_lock();
 334         /* Attempt a lockless read on the first round. */
 335         nextseq = 0;
 336         do {
 337                 seq = nextseq;
 338                 flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
 339                 times->utime = sig->utime;
 340                 times->stime = sig->stime;
 341                 times->sum_exec_runtime = sig->sum_sched_runtime;
 342
 343                 for_each_thread(tsk, t) {
 344                         task_cputime(t, &utime, &stime);
 345                         times->utime += utime;
 346                         times->stime += stime;
 347                         times->sum_exec_runtime += read_sum_exec_runtime(t);
 348                 }
 349                 /* If lockless access failed, take the lock. */
 350                 nextseq = 1;
 351         } while (need_seqretry(&sig->stats_lock, seq));
 352         done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
 353         rcu_read_unlock();
 354 }
 355
 356 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 357 /*
 358  * Account a tick to a process and cpustat
 359  * @p: the process that the cpu time gets accounted to
 360  * @user_tick: is the tick from userspace
 361  * @rq: the pointer to rq
 362  *
 363  * Tick demultiplexing follows the order
 364  * - pending hardirq update
 365  * - pending softirq update
 366  * - user_time
 367  * - idle_time
 368  * - system time
 369  *   - check for guest_time
 370  *   - else account as system_time
 371  *
 372  * Check for hardirq is done both for system and user time as there is
 373  * no timer going off while we are on hardirq and hence we may never get an
 374  * opportunity to update it solely in system time.
 375  * p->stime and friends are only updated on system time and not on irq
 376  * softirq as those do not count in task exec_runtime any more.
 377  */
 378 static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 379                                          struct rq *rq, int ticks)
 380 {
 381         u64 cputime = (__force u64) cputime_one_jiffy * ticks;
 382         cputime_t scaled, other;
 383
 384         /*
 385          * When returning from idle, many ticks can get accounted at
 386          * once, including some ticks of steal, irq, and softirq time.
 387          * Subtract those ticks from the amount of time accounted to
 388          * idle, or potentially user or system time. Due to rounding,
 389          * other time can exceed ticks occasionally.
 390          */
 391         other = account_other_time(ULONG_MAX);
 392         if (other >= cputime)
 393                 return;
 394         cputime -= other;
 395         scaled = cputime_to_scaled(cputime);
 396
 397         if (this_cpu_ksoftirqd() == p) {
 398                 /*
 399                  * ksoftirqd time do not get accounted in cpu_softirq_time.
 400                  * So, we have to handle it separately here.
 401                  * Also, p->stime needs to be updated for ksoftirqd.
 402                  */
 403                 __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ);
 404         } else if (user_tick) {
 405                 account_user_time(p, cputime, scaled);
 406         } else if (p == rq->idle) {
 407                 account_idle_time(cputime);
 408         } else if (p->flags & PF_VCPU) { /* System time or guest time */
 409                 account_guest_time(p, cputime, scaled);
 410         } else {
 411                 __account_system_time(p, cputime, scaled,       CPUTIME_SYSTEM);
 412         }
 413 }
 414
 415 static void irqtime_account_idle_ticks(int ticks)
 416 {
 417         struct rq *rq = this_rq();
 418
 419         irqtime_account_process_tick(current, 0, rq, ticks);
 420 }
 421 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
 422 static inline void irqtime_account_idle_ticks(int ticks) {}
 423 static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 424                                                 struct rq *rq, int nr_ticks) {}
 425 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 426
 427 /*
 428  * Use precise platform statistics if available:
 429  */
 430 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 431
 432 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH
 433 void vtime_common_task_switch(struct task_struct *prev)
 434 {
 435         if (is_idle_task(prev))
 436                 vtime_account_idle(prev);
 437         else
 438                 vtime_account_system(prev);
 439
 440 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 441         vtime_account_user(prev);
 442 #endif
 443         arch_vtime_task_switch(prev);
 444 }
 445 #endif
 446
 447 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 448
 449
 450 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 451 /*
 452  * Archs that account the whole time spent in the idle task
 453  * (outside irq) as idle time can rely on this and just implement
 454  * vtime_account_system() and vtime_account_idle(). Archs that
 455  * have other meaning of the idle time (s390 only includes the
 456  * time spent by the CPU when it's in low power mode) must override
 457  * vtime_account().
 458  */
 459 #ifndef __ARCH_HAS_VTIME_ACCOUNT
 460 void vtime_account_irq_enter(struct task_struct *tsk)
 461 {
 462         if (!in_interrupt() && is_idle_task(tsk))
 463                 vtime_account_idle(tsk);
 464         else
 465                 vtime_account_system(tsk);
 466 }
 467 EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 468 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
 469
 470 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 471 {
 472         *ut = p->utime;
 473         *st = p->stime;
 474 }
 475 EXPORT_SYMBOL_GPL(task_cputime_adjusted);
 476
 477 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 478 {
 479         struct task_cputime cputime;
 480
 481         thread_group_cputime(p, &cputime);
 482
 483         *ut = cputime.utime;
 484         *st = cputime.stime;
 485 }
 486 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 487 /*
 488  * Account a single tick of cpu time.
 489  * @p: the process that the cpu time gets accounted to
 490  * @user_tick: indicates if the tick is a user or a system tick
 491  */
 492 void account_process_tick(struct task_struct *p, int user_tick)
 493 {
 494         cputime_t cputime, scaled, steal;
 495         struct rq *rq = this_rq();
 496
 497         if (vtime_accounting_cpu_enabled())
 498                 return;
 499
 500         if (sched_clock_irqtime) {
 501                 irqtime_account_process_tick(p, user_tick, rq, 1);
 502                 return;
 503         }
 504
 505         cputime = cputime_one_jiffy;
 506         steal = steal_account_process_time(ULONG_MAX);
 507
 508         if (steal >= cputime)
 509                 return;
 510
 511         cputime -= steal;
 512         scaled = cputime_to_scaled(cputime);
 513
 514         if (user_tick)
 515                 account_user_time(p, cputime, scaled);
 516         else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
 517                 account_system_time(p, HARDIRQ_OFFSET, cputime, scaled);
 518         else
 519                 account_idle_time(cputime);
 520 }
 521
 522 /*
 523  * Account multiple ticks of idle time.
 524  * @ticks: number of stolen ticks
 525  */
 526 void account_idle_ticks(unsigned long ticks)
 527 {
 528         cputime_t cputime, steal;
 529
 530         if (sched_clock_irqtime) {
 531                 irqtime_account_idle_ticks(ticks);
 532                 return;
 533         }
 534
 535         cputime = jiffies_to_cputime(ticks);
 536         steal = steal_account_process_time(ULONG_MAX);
 537
 538         if (steal >= cputime)
 539                 return;
 540
 541         cputime -= steal;
 542         account_idle_time(cputime);
 543 }
 544
 545 /*
 546  * Perform (stime * rtime) / total, but avoid multiplication overflow by
 547  * loosing precision when the numbers are big.
 548  */
 549 static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
 550 {
 551         u64 scaled;
 552
 553         for (;;) {
 554                 /* Make sure "rtime" is the bigger of stime/rtime */
 555                 if (stime > rtime)
 556                         swap(rtime, stime);
 557
 558                 /* Make sure 'total' fits in 32 bits */
 559                 if (total >> 32)
 560                         goto drop_precision;
 561
 562                 /* Does rtime (and thus stime) fit in 32 bits? */
 563                 if (!(rtime >> 32))
 564                         break;
 565
 566                 /* Can we just balance rtime/stime rather than dropping bits? */
 567                 if (stime >> 31)
 568                         goto drop_precision;
 569
 570                 /* We can grow stime and shrink rtime and try to make them both fit */
 571                 stime <<= 1;
 572                 rtime >>= 1;
 573                 continue;
 574
 575 drop_precision:
 576                 /* We drop from rtime, it has more bits than stime */
 577                 rtime >>= 1;
 578                 total >>= 1;
 579         }
 580
 581         /*
 582          * Make sure gcc understands that this is a 32x32->64 multiply,
 583          * followed by a 64/32->64 divide.
 584          */
 585         scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
 586         return (__force cputime_t) scaled;
 587 }
 588
 589 /*
 590  * Adjust tick based cputime random precision against scheduler runtime
 591  * accounting.
 592  *
 593  * Tick based cputime accounting depend on random scheduling timeslices of a
 594  * task to be interrupted or not by the timer.  Depending on these
 595  * circumstances, the number of these interrupts may be over or
 596  * under-optimistic, matching the real user and system cputime with a variable
 597  * precision.
 598  *
 599  * Fix this by scaling these tick based values against the total runtime
 600  * accounted by the CFS scheduler.
 601  *
 602  * This code provides the following guarantees:
 603  *
 604  *   stime + utime == rtime
 605  *   stime_i+1 >= stime_i, utime_i+1 >= utime_i
 606  *
 607  * Assuming that rtime_i+1 >= rtime_i.
 608  */
 609 static void cputime_adjust(struct task_cputime *curr,
 610                            struct prev_cputime *prev,
 611                            cputime_t *ut, cputime_t *st)
 612 {
 613         cputime_t rtime, stime, utime;
 614         unsigned long flags;
 615
 616         /* Serialize concurrent callers such that we can honour our guarantees */
 617         raw_spin_lock_irqsave(&prev->lock, flags);
 618         rtime = nsecs_to_cputime(curr->sum_exec_runtime);
 619
 620         /*
 621          * This is possible under two circumstances:
 622          *  - rtime isn't monotonic after all (a bug);
 623          *  - we got reordered by the lock.
 624          *
 625          * In both cases this acts as a filter such that the rest of the code
 626          * can assume it is monotonic regardless of anything else.
 627          */
 628         if (prev->stime + prev->utime >= rtime)
 629                 goto out;
 630
 631         stime = curr->stime;
 632         utime = curr->utime;
 633
 634         /*
 635          * If either stime or both stime and utime are 0, assume all runtime is
 636          * userspace. Once a task gets some ticks, the monotonicy code at
 637          * 'update' will ensure things converge to the observed ratio.
 638          */
 639         if (stime == 0) {
 640                 utime = rtime;
 641                 goto update;
 642         }
 643
 644         if (utime == 0) {
 645                 stime = rtime;
 646                 goto update;
 647         }
 648
 649         stime = scale_stime((__force u64)stime, (__force u64)rtime,
 650                             (__force u64)(stime + utime));
 651
 652 update:
 653         /*
 654          * Make sure stime doesn't go backwards; this preserves monotonicity
 655          * for utime because rtime is monotonic.
 656          *
 657          *  utime_i+1 = rtime_i+1 - stime_i
 658          *            = rtime_i+1 - (rtime_i - utime_i)
 659          *            = (rtime_i+1 - rtime_i) + utime_i
 660          *            >= utime_i
 661          */
 662         if (stime < prev->stime)
 663                 stime = prev->stime;
 664         utime = rtime - stime;
 665
 666         /*
 667          * Make sure utime doesn't go backwards; this still preserves
 668          * monotonicity for stime, analogous argument to above.
 669          */
 670         if (utime < prev->utime) {
 671                 utime = prev->utime;
 672                 stime = rtime - utime;
 673         }
 674
 675         prev->stime = stime;
 676         prev->utime = utime;
 677 out:
 678         *ut = prev->utime;
 679         *st = prev->stime;
 680         raw_spin_unlock_irqrestore(&prev->lock, flags);
 681 }
 682
 683 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 684 {
 685         struct task_cputime cputime = {
 686                 .sum_exec_runtime = p->se.sum_exec_runtime,
 687         };
 688
 689         task_cputime(p, &cputime.utime, &cputime.stime);
 690         cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 691 }
 692 EXPORT_SYMBOL_GPL(task_cputime_adjusted);
 693
 694 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 695 {
 696         struct task_cputime cputime;
 697
 698         thread_group_cputime(p, &cputime);
 699         cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 700 }
 701 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 702
 703 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 704 static cputime_t vtime_delta(struct task_struct *tsk)
 705 {
 706         unsigned long now = READ_ONCE(jiffies);
 707
 708         if (time_before(now, (unsigned long)tsk->vtime_snap))
 709                 return 0;
 710
 711         return jiffies_to_cputime(now - tsk->vtime_snap);
 712 }
 713
 714 static cputime_t get_vtime_delta(struct task_struct *tsk)
 715 {
 716         unsigned long now = READ_ONCE(jiffies);
 717         cputime_t delta, other;
 718
 719         /*
 720          * Unlike tick based timing, vtime based timing never has lost
 721          * ticks, and no need for steal time accounting to make up for
 722          * lost ticks. Vtime accounts a rounded version of actual
 723          * elapsed time. Limit account_other_time to prevent rounding
 724          * errors from causing elapsed vtime to go negative.
 725          */
 726         delta = jiffies_to_cputime(now - tsk->vtime_snap);
 727         other = account_other_time(delta);
 728         WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
 729         tsk->vtime_snap = now;
 730
 731         return delta - other;
 732 }
 733
 734 static void __vtime_account_system(struct task_struct *tsk)
 735 {
 736         cputime_t delta_cpu = get_vtime_delta(tsk);
 737
 738         account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
 739 }
 740
 741 void vtime_account_system(struct task_struct *tsk)
 742 {
 743         if (!vtime_delta(tsk))
 744                 return;
 745
 746         write_seqcount_begin(&tsk->vtime_seqcount);
 747         __vtime_account_system(tsk);
 748         write_seqcount_end(&tsk->vtime_seqcount);
 749 }
 750
 751 void vtime_account_user(struct task_struct *tsk)
 752 {
 753         cputime_t delta_cpu;
 754
 755         write_seqcount_begin(&tsk->vtime_seqcount);
 756         tsk->vtime_snap_whence = VTIME_SYS;
 757         if (vtime_delta(tsk)) {
 758                 delta_cpu = get_vtime_delta(tsk);
 759                 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
 760         }
 761         write_seqcount_end(&tsk->vtime_seqcount);
 762 }
 763
 764 void vtime_user_enter(struct task_struct *tsk)
 765 {
 766         write_seqcount_begin(&tsk->vtime_seqcount);
 767         if (vtime_delta(tsk))
 768                 __vtime_account_system(tsk);
 769         tsk->vtime_snap_whence = VTIME_USER;
 770         write_seqcount_end(&tsk->vtime_seqcount);
 771 }
 772
 773 void vtime_guest_enter(struct task_struct *tsk)
 774 {
 775         /*
 776          * The flags must be updated under the lock with
 777          * the vtime_snap flush and update.
 778          * That enforces a right ordering and update sequence
 779          * synchronization against the reader (task_gtime())
 780          * that can thus safely catch up with a tickless delta.
 781          */
 782         write_seqcount_begin(&tsk->vtime_seqcount);
 783         if (vtime_delta(tsk))
 784                 __vtime_account_system(tsk);
 785         current->flags |= PF_VCPU;
 786         write_seqcount_end(&tsk->vtime_seqcount);
 787 }
 788 EXPORT_SYMBOL_GPL(vtime_guest_enter);
 789
 790 void vtime_guest_exit(struct task_struct *tsk)
 791 {
 792         write_seqcount_begin(&tsk->vtime_seqcount);
 793         __vtime_account_system(tsk);
 794         current->flags &= ~PF_VCPU;
 795         write_seqcount_end(&tsk->vtime_seqcount);
 796 }
 797 EXPORT_SYMBOL_GPL(vtime_guest_exit);
 798
 799 void vtime_account_idle(struct task_struct *tsk)
 800 {
 801         cputime_t delta_cpu = get_vtime_delta(tsk);
 802
 803         account_idle_time(delta_cpu);
 804 }
 805
 806 void arch_vtime_task_switch(struct task_struct *prev)
 807 {
 808         write_seqcount_begin(&prev->vtime_seqcount);
 809         prev->vtime_snap_whence = VTIME_INACTIVE;
 810         write_seqcount_end(&prev->vtime_seqcount);
 811
 812         write_seqcount_begin(&current->vtime_seqcount);
 813         current->vtime_snap_whence = VTIME_SYS;
 814         current->vtime_snap = jiffies;
 815         write_seqcount_end(&current->vtime_seqcount);
 816 }
 817
 818 void vtime_init_idle(struct task_struct *t, int cpu)
 819 {
 820         unsigned long flags;
 821
 822         local_irq_save(flags);
 823         write_seqcount_begin(&t->vtime_seqcount);
 824         t->vtime_snap_whence = VTIME_SYS;
 825         t->vtime_snap = jiffies;
 826         write_seqcount_end(&t->vtime_seqcount);
 827         local_irq_restore(flags);
 828 }
 829
 830 cputime_t task_gtime(struct task_struct *t)
 831 {
 832         unsigned int seq;
 833         cputime_t gtime;
 834
 835         if (!vtime_accounting_enabled())
 836                 return t->gtime;
 837
 838         do {
 839                 seq = read_seqcount_begin(&t->vtime_seqcount);
 840
 841                 gtime = t->gtime;
 842                 if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU)
 843                         gtime += vtime_delta(t);
 844
 845         } while (read_seqcount_retry(&t->vtime_seqcount, seq));
 846
 847         return gtime;
 848 }
 849
 850 /*
 851  * Fetch cputime raw values from fields of task_struct and
 852  * add up the pending nohz execution time since the last
 853  * cputime snapshot.
 854  */
 855 static void
 856 fetch_task_cputime(struct task_struct *t,
 857                    cputime_t *u_dst, cputime_t *s_dst,
 858                    cputime_t *u_src, cputime_t *s_src,
 859                    cputime_t *udelta, cputime_t *sdelta)
 860 {
 861         unsigned int seq;
 862         unsigned long long delta;
 863
 864         do {
 865                 *udelta = 0;
 866                 *sdelta = 0;
 867
 868                 seq = read_seqcount_begin(&t->vtime_seqcount);
 869
 870                 if (u_dst)
 871                         *u_dst = *u_src;
 872                 if (s_dst)
 873                         *s_dst = *s_src;
 874
 875                 /* Task is sleeping, nothing to add */
 876                 if (t->vtime_snap_whence == VTIME_INACTIVE ||
 877                     is_idle_task(t))
 878                         continue;
 879
 880                 delta = vtime_delta(t);
 881
 882                 /*
 883                  * Task runs either in user or kernel space, add pending nohz time to
 884                  * the right place.
 885                  */
 886                 if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
 887                         *udelta = delta;
 888                 } else {
 889                         if (t->vtime_snap_whence == VTIME_SYS)
 890                                 *sdelta = delta;
 891                 }
 892         } while (read_seqcount_retry(&t->vtime_seqcount, seq));
 893 }
 894
 895
 896 void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
 897 {
 898         cputime_t udelta, sdelta;
 899
 900         if (!vtime_accounting_enabled()) {
 901                 if (utime)
 902                         *utime = t->utime;
 903                 if (stime)
 904                         *stime = t->stime;
 905                 return;
 906         }
 907
 908         fetch_task_cputime(t, utime, stime, &t->utime,
 909                            &t->stime, &udelta, &sdelta);
 910         if (utime)
 911                 *utime += udelta;
 912         if (stime)
 913                 *stime += sdelta;
 914 }
 915
 916 void task_cputime_scaled(struct task_struct *t,
 917                          cputime_t *utimescaled, cputime_t *stimescaled)
 918 {
 919         cputime_t udelta, sdelta;
 920
 921         if (!vtime_accounting_enabled()) {
 922                 if (utimescaled)
 923                         *utimescaled = t->utimescaled;
 924                 if (stimescaled)
 925                         *stimescaled = t->stimescaled;
 926                 return;
 927         }
 928
 929         fetch_task_cputime(t, utimescaled, stimescaled,
 930                            &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
 931         if (utimescaled)
 932                 *utimescaled += cputime_to_scaled(udelta);
 933         if (stimescaled)
 934                 *stimescaled += cputime_to_scaled(sdelta);
 935 }
 936 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */