kernel/bpf/verifier.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3  * Copyright (c) 2016 Facebook
   4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
   5  */
   6 #include <uapi/linux/btf.h>
   7 #include <linux/kernel.h>
   8 #include <linux/types.h>
   9 #include <linux/slab.h>
  10 #include <linux/bpf.h>
  11 #include <linux/btf.h>
  12 #include <linux/bpf_verifier.h>
  13 #include <linux/filter.h>
  14 #include <net/netlink.h>
  15 #include <linux/file.h>
  16 #include <linux/vmalloc.h>
  17 #include <linux/stringify.h>
  18 #include <linux/bsearch.h>
  19 #include <linux/sort.h>
  20 #include <linux/perf_event.h>
  21 #include <linux/ctype.h>
  22
  23 #include "disasm.h"
  24
  25 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  26 #define BPF_PROG_TYPE(_id, _name) \
  27         [_id] = & _name ## _verifier_ops,
  28 #define BPF_MAP_TYPE(_id, _ops)
  29 #include <linux/bpf_types.h>
  30 #undef BPF_PROG_TYPE
  31 #undef BPF_MAP_TYPE
  32 };
  33
  34 /* bpf_check() is a static code analyzer that walks eBPF program
  35  * instruction by instruction and updates register/stack state.
  36  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
  37  *
  38  * The first pass is depth-first-search to check that the program is a DAG.
  39  * It rejects the following programs:
  40  * - larger than BPF_MAXINSNS insns
  41  * - if loop is present (detected via back-edge)
  42  * - unreachable insns exist (shouldn't be a forest. program = one function)
  43  * - out of bounds or malformed jumps
  44  * The second pass is all possible path descent from the 1st insn.
  45  * Since it's analyzing all pathes through the program, the length of the
  46  * analysis is limited to 64k insn, which may be hit even if total number of
  47  * insn is less then 4K, but there are too many branches that change stack/regs.
  48  * Number of 'branches to be analyzed' is limited to 1k
  49  *
  50  * On entry to each instruction, each register has a type, and the instruction
  51  * changes the types of the registers depending on instruction semantics.
  52  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
  53  * copied to R1.
  54  *
  55  * All registers are 64-bit.
  56  * R0 - return register
  57  * R1-R5 argument passing registers
  58  * R6-R9 callee saved registers
  59  * R10 - frame pointer read-only
  60  *
  61  * At the start of BPF program the register R1 contains a pointer to bpf_context
  62  * and has type PTR_TO_CTX.
  63  *
  64  * Verifier tracks arithmetic operations on pointers in case:
  65  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
  66  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
  67  * 1st insn copies R10 (which has FRAME_PTR) type into R1
  68  * and 2nd arithmetic instruction is pattern matched to recognize
  69  * that it wants to construct a pointer to some element within stack.
  70  * So after 2nd insn, the register R1 has type PTR_TO_STACK
  71  * (and -20 constant is saved for further stack bounds checking).
  72  * Meaning that this reg is a pointer to stack plus known immediate constant.
  73  *
  74  * Most of the time the registers have SCALAR_VALUE type, which
  75  * means the register has some value, but it's not a valid pointer.
  76  * (like pointer plus pointer becomes SCALAR_VALUE type)
  77  *
  78  * When verifier sees load or store instructions the type of base register
  79  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
  80  * four pointer types recognized by check_mem_access() function.
  81  *
  82  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  83  * and the range of [ptr, ptr + map's value_size) is accessible.
  84  *
  85  * registers used to pass values to function calls are checked against
  86  * function argument constraints.
  87  *
  88  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
  89  * It means that the register type passed to this function must be
  90  * PTR_TO_STACK and it will be used inside the function as
  91  * 'pointer to map element key'
  92  *
  93  * For example the argument constraints for bpf_map_lookup_elem():
  94  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
  95  *   .arg1_type = ARG_CONST_MAP_PTR,
  96  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
  97  *
  98  * ret_type says that this function returns 'pointer to map elem value or null'
  99  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 100  * 2nd argument should be a pointer to stack, which will be used inside
 101  * the helper function as a pointer to map element key.
 102  *
 103  * On the kernel side the helper function looks like:
 104  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 105  * {
 106  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 107  *    void *key = (void *) (unsigned long) r2;
 108  *    void *value;
 109  *
 110  *    here kernel can access 'key' and 'map' pointers safely, knowing that
 111  *    [key, key + map->key_size) bytes are valid and were initialized on
 112  *    the stack of eBPF program.
 113  * }
 114  *
 115  * Corresponding eBPF program may look like:
 116  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
 117  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
 118  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
 119  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 120  * here verifier looks at prototype of map_lookup_elem() and sees:
 121  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
 122  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
 123  *
 124  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
 125  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
 126  * and were initialized prior to this call.
 127  * If it's ok, then verifier allows this BPF_CALL insn and looks at
 128  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
 129  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 130  * returns ether pointer to map value or NULL.
 131  *
 132  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
 133  * insn, the register holding that pointer in the true branch changes state to
 134  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
 135  * branch. See check_cond_jmp_op().
 136  *
 137  * After the call R0 is set to return type of the function and registers R1-R5
 138  * are set to NOT_INIT to indicate that they are no longer readable.
 139  *
 140  * The following reference types represent a potential reference to a kernel
 141  * resource which, after first being allocated, must be checked and freed by
 142  * the BPF program:
 143  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
 144  *
 145  * When the verifier sees a helper call return a reference type, it allocates a
 146  * pointer id for the reference and stores it in the current function state.
 147  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
 148  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
 149  * passes through a NULL-check conditional. For the branch wherein the state is
 150  * changed to CONST_IMM, the verifier releases the reference.
 151  *
 152  * For each helper function that allocates a reference, such as
 153  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
 154  * bpf_sk_release(). When a reference type passes into the release function,
 155  * the verifier also releases the reference. If any unchecked or unreleased
 156  * reference remains at the end of the program, the verifier rejects it.
 157  */
 158
 159 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
 160 struct bpf_verifier_stack_elem {
 161         /* verifer state is 'st'
 162          * before processing instruction 'insn_idx'
 163          * and after processing instruction 'prev_insn_idx'
 164          */
 165         struct bpf_verifier_state st;
 166         int insn_idx;
 167         int prev_insn_idx;
 168         struct bpf_verifier_stack_elem *next;
 169 };
 170
 171 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ    8192
 172 #define BPF_COMPLEXITY_LIMIT_STATES     64
 173
 174 #define BPF_MAP_PTR_UNPRIV      1UL
 175 #define BPF_MAP_PTR_POISON      ((void *)((0xeB9FUL << 1) +     \
 176                                           POISON_POINTER_DELTA))
 177 #define BPF_MAP_PTR(X)          ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
 178
 179 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
 180 {
 181         return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
 182 }
 183
 184 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
 185 {
 186         return aux->map_state & BPF_MAP_PTR_UNPRIV;
 187 }
 188
 189 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
 190                               const struct bpf_map *map, bool unpriv)
 191 {
 192         BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
 193         unpriv |= bpf_map_ptr_unpriv(aux);
 194         aux->map_state = (unsigned long)map |
 195                          (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
 196 }
 197
 198 struct bpf_call_arg_meta {
 199         struct bpf_map *map_ptr;
 200         bool raw_mode;
 201         bool pkt_access;
 202         int regno;
 203         int access_size;
 204         u64 msize_max_value;
 205         int ref_obj_id;
 206         int func_id;
 207 };
 208
 209 static DEFINE_MUTEX(bpf_verifier_lock);
 210
 211 static const struct bpf_line_info *
 212 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
 213 {
 214         const struct bpf_line_info *linfo;
 215         const struct bpf_prog *prog;
 216         u32 i, nr_linfo;
 217
 218         prog = env->prog;
 219         nr_linfo = prog->aux->nr_linfo;
 220
 221         if (!nr_linfo || insn_off >= prog->len)
 222                 return NULL;
 223
 224         linfo = prog->aux->linfo;
 225         for (i = 1; i < nr_linfo; i++)
 226                 if (insn_off < linfo[i].insn_off)
 227                         break;
 228
 229         return &linfo[i - 1];
 230 }
 231
 232 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 233                        va_list args)
 234 {
 235         unsigned int n;
 236
 237         n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 238
 239         WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
 240                   "verifier log line truncated - local buffer too short\n");
 241
 242         n = min(log->len_total - log->len_used - 1, n);
 243         log->kbuf[n] = '\0';
 244
 245         if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
 246                 log->len_used += n;
 247         else
 248                 log->ubuf = NULL;
 249 }
 250
 251 /* log_level controls verbosity level of eBPF verifier.
 252  * bpf_verifier_log_write() is used to dump the verification trace to the log,
 253  * so the user can figure out what's wrong with the program
 254  */
 255 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 256                                            const char *fmt, ...)
 257 {
 258         va_list args;
 259
 260         if (!bpf_verifier_log_needed(&env->log))
 261                 return;
 262
 263         va_start(args, fmt);
 264         bpf_verifier_vlog(&env->log, fmt, args);
 265         va_end(args);
 266 }
 267 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
 268
 269 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 270 {
 271         struct bpf_verifier_env *env = private_data;
 272         va_list args;
 273
 274         if (!bpf_verifier_log_needed(&env->log))
 275                 return;
 276
 277         va_start(args, fmt);
 278         bpf_verifier_vlog(&env->log, fmt, args);
 279         va_end(args);
 280 }
 281
 282 static const char *ltrim(const char *s)
 283 {
 284         while (isspace(*s))
 285                 s++;
 286
 287         return s;
 288 }
 289
 290 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
 291                                          u32 insn_off,
 292                                          const char *prefix_fmt, ...)
 293 {
 294         const struct bpf_line_info *linfo;
 295
 296         if (!bpf_verifier_log_needed(&env->log))
 297                 return;
 298
 299         linfo = find_linfo(env, insn_off);
 300         if (!linfo || linfo == env->prev_linfo)
 301                 return;
 302
 303         if (prefix_fmt) {
 304                 va_list args;
 305
 306                 va_start(args, prefix_fmt);
 307                 bpf_verifier_vlog(&env->log, prefix_fmt, args);
 308                 va_end(args);
 309         }
 310
 311         verbose(env, "%s\n",
 312                 ltrim(btf_name_by_offset(env->prog->aux->btf,
 313                                          linfo->line_off)));
 314
 315         env->prev_linfo = linfo;
 316 }
 317
 318 static bool type_is_pkt_pointer(enum bpf_reg_type type)
 319 {
 320         return type == PTR_TO_PACKET ||
 321                type == PTR_TO_PACKET_META;
 322 }
 323
 324 static bool type_is_sk_pointer(enum bpf_reg_type type)
 325 {
 326         return type == PTR_TO_SOCKET ||
 327                 type == PTR_TO_SOCK_COMMON ||
 328                 type == PTR_TO_TCP_SOCK ||
 329                 type == PTR_TO_XDP_SOCK;
 330 }
 331
 332 static bool reg_type_may_be_null(enum bpf_reg_type type)
 333 {
 334         return type == PTR_TO_MAP_VALUE_OR_NULL ||
 335                type == PTR_TO_SOCKET_OR_NULL ||
 336                type == PTR_TO_SOCK_COMMON_OR_NULL ||
 337                type == PTR_TO_TCP_SOCK_OR_NULL;
 338 }
 339
 340 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
 341 {
 342         return reg->type == PTR_TO_MAP_VALUE &&
 343                 map_value_has_spin_lock(reg->map_ptr);
 344 }
 345
 346 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
 347 {
 348         return type == PTR_TO_SOCKET ||
 349                 type == PTR_TO_SOCKET_OR_NULL ||
 350                 type == PTR_TO_TCP_SOCK ||
 351                 type == PTR_TO_TCP_SOCK_OR_NULL;
 352 }
 353
 354 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
 355 {
 356         return type == ARG_PTR_TO_SOCK_COMMON;
 357 }
 358
 359 /* Determine whether the function releases some resources allocated by another
 360  * function call. The first reference type argument will be assumed to be
 361  * released by release_reference().
 362  */
 363 static bool is_release_function(enum bpf_func_id func_id)
 364 {
 365         return func_id == BPF_FUNC_sk_release;
 366 }
 367
 368 static bool is_acquire_function(enum bpf_func_id func_id)
 369 {
 370         return func_id == BPF_FUNC_sk_lookup_tcp ||
 371                 func_id == BPF_FUNC_sk_lookup_udp ||
 372                 func_id == BPF_FUNC_skc_lookup_tcp;
 373 }
 374
 375 static bool is_ptr_cast_function(enum bpf_func_id func_id)
 376 {
 377         return func_id == BPF_FUNC_tcp_sock ||
 378                 func_id == BPF_FUNC_sk_fullsock;
 379 }
 380
 381 /* string representation of 'enum bpf_reg_type' */
 382 static const char * const reg_type_str[] = {
 383         [NOT_INIT]              = "?",
 384         [SCALAR_VALUE]          = "inv",
 385         [PTR_TO_CTX]            = "ctx",
 386         [CONST_PTR_TO_MAP]      = "map_ptr",
 387         [PTR_TO_MAP_VALUE]      = "map_value",
 388         [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
 389         [PTR_TO_STACK]          = "fp",
 390         [PTR_TO_PACKET]         = "pkt",
 391         [PTR_TO_PACKET_META]    = "pkt_meta",
 392         [PTR_TO_PACKET_END]     = "pkt_end",
 393         [PTR_TO_FLOW_KEYS]      = "flow_keys",
 394         [PTR_TO_SOCKET]         = "sock",
 395         [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
 396         [PTR_TO_SOCK_COMMON]    = "sock_common",
 397         [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
 398         [PTR_TO_TCP_SOCK]       = "tcp_sock",
 399         [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 400         [PTR_TO_TP_BUFFER]      = "tp_buffer",
 401         [PTR_TO_XDP_SOCK]       = "xdp_sock",
 402 };
 403
 404 static char slot_type_char[] = {
 405         [STACK_INVALID] = '?',
 406         [STACK_SPILL]   = 'r',
 407         [STACK_MISC]    = 'm',
 408         [STACK_ZERO]    = '0',
 409 };
 410
 411 static void print_liveness(struct bpf_verifier_env *env,
 412                            enum bpf_reg_liveness live)
 413 {
 414         if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
 415             verbose(env, "_");
 416         if (live & REG_LIVE_READ)
 417                 verbose(env, "r");
 418         if (live & REG_LIVE_WRITTEN)
 419                 verbose(env, "w");
 420         if (live & REG_LIVE_DONE)
 421                 verbose(env, "D");
 422 }
 423
 424 static struct bpf_func_state *func(struct bpf_verifier_env *env,
 425                                    const struct bpf_reg_state *reg)
 426 {
 427         struct bpf_verifier_state *cur = env->cur_state;
 428
 429         return cur->frame[reg->frameno];
 430 }
 431
 432 static void print_verifier_state(struct bpf_verifier_env *env,
 433                                  const struct bpf_func_state *state)
 434 {
 435         const struct bpf_reg_state *reg;
 436         enum bpf_reg_type t;
 437         int i;
 438
 439         if (state->frameno)
 440                 verbose(env, " frame%d:", state->frameno);
 441         for (i = 0; i < MAX_BPF_REG; i++) {
 442                 reg = &state->regs[i];
 443                 t = reg->type;
 444                 if (t == NOT_INIT)
 445                         continue;
 446                 verbose(env, " R%d", i);
 447                 print_liveness(env, reg->live);
 448                 verbose(env, "=%s", reg_type_str[t]);
 449                 if (t == SCALAR_VALUE && reg->precise)
 450                         verbose(env, "P");
 451                 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
 452                     tnum_is_const(reg->var_off)) {
 453                         /* reg->off should be 0 for SCALAR_VALUE */
 454                         verbose(env, "%lld", reg->var_off.value + reg->off);
 455                 } else {
 456                         verbose(env, "(id=%d", reg->id);
 457                         if (reg_type_may_be_refcounted_or_null(t))
 458                                 verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
 459                         if (t != SCALAR_VALUE)
 460                                 verbose(env, ",off=%d", reg->off);
 461                         if (type_is_pkt_pointer(t))
 462                                 verbose(env, ",r=%d", reg->range);
 463                         else if (t == CONST_PTR_TO_MAP ||
 464                                  t == PTR_TO_MAP_VALUE ||
 465                                  t == PTR_TO_MAP_VALUE_OR_NULL)
 466                                 verbose(env, ",ks=%d,vs=%d",
 467                                         reg->map_ptr->key_size,
 468                                         reg->map_ptr->value_size);
 469                         if (tnum_is_const(reg->var_off)) {
 470                                 /* Typically an immediate SCALAR_VALUE, but
 471                                  * could be a pointer whose offset is too big
 472                                  * for reg->off
 473                                  */
 474                                 verbose(env, ",imm=%llx", reg->var_off.value);
 475                         } else {
 476                                 if (reg->smin_value != reg->umin_value &&
 477                                     reg->smin_value != S64_MIN)
 478                                         verbose(env, ",smin_value=%lld",
 479                                                 (long long)reg->smin_value);
 480                                 if (reg->smax_value != reg->umax_value &&
 481                                     reg->smax_value != S64_MAX)
 482                                         verbose(env, ",smax_value=%lld",
 483                                                 (long long)reg->smax_value);
 484                                 if (reg->umin_value != 0)
 485                                         verbose(env, ",umin_value=%llu",
 486                                                 (unsigned long long)reg->umin_value);
 487                                 if (reg->umax_value != U64_MAX)
 488                                         verbose(env, ",umax_value=%llu",
 489                                                 (unsigned long long)reg->umax_value);
 490                                 if (!tnum_is_unknown(reg->var_off)) {
 491                                         char tn_buf[48];
 492
 493                                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 494                                         verbose(env, ",var_off=%s", tn_buf);
 495                                 }
 496                         }
 497                         verbose(env, ")");
 498                 }
 499         }
 500         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
 501                 char types_buf[BPF_REG_SIZE + 1];
 502                 bool valid = false;
 503                 int j;
 504
 505                 for (j = 0; j < BPF_REG_SIZE; j++) {
 506                         if (state->stack[i].slot_type[j] != STACK_INVALID)
 507                                 valid = true;
 508                         types_buf[j] = slot_type_char[
 509                                         state->stack[i].slot_type[j]];
 510                 }
 511                 types_buf[BPF_REG_SIZE] = 0;
 512                 if (!valid)
 513                         continue;
 514                 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
 515                 print_liveness(env, state->stack[i].spilled_ptr.live);
 516                 if (state->stack[i].slot_type[0] == STACK_SPILL) {
 517                         reg = &state->stack[i].spilled_ptr;
 518                         t = reg->type;
 519                         verbose(env, "=%s", reg_type_str[t]);
 520                         if (t == SCALAR_VALUE && reg->precise)
 521                                 verbose(env, "P");
 522                         if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
 523                                 verbose(env, "%lld", reg->var_off.value + reg->off);
 524                 } else {
 525                         verbose(env, "=%s", types_buf);
 526                 }
 527         }
 528         if (state->acquired_refs && state->refs[0].id) {
 529                 verbose(env, " refs=%d", state->refs[0].id);
 530                 for (i = 1; i < state->acquired_refs; i++)
 531                         if (state->refs[i].id)
 532                                 verbose(env, ",%d", state->refs[i].id);
 533         }
 534         verbose(env, "\n");
 535 }
 536
 537 #define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)                         \
 538 static int copy_##NAME##_state(struct bpf_func_state *dst,              \
 539                                const struct bpf_func_state *src)        \
 540 {                                                                       \
 541         if (!src->FIELD)                                                \
 542                 return 0;                                               \
 543         if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {                    \
 544                 /* internal bug, make state invalid to reject the program */ \
 545                 memset(dst, 0, sizeof(*dst));                           \
 546                 return -EFAULT;                                         \
 547         }                                                               \
 548         memcpy(dst->FIELD, src->FIELD,                                  \
 549                sizeof(*src->FIELD) * (src->COUNT / SIZE));              \
 550         return 0;                                                       \
 551 }
 552 /* copy_reference_state() */
 553 COPY_STATE_FN(reference, acquired_refs, refs, 1)
 554 /* copy_stack_state() */
 555 COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
 556 #undef COPY_STATE_FN
 557
 558 #define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)                      \
 559 static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
 560                                   bool copy_old)                        \
 561 {                                                                       \
 562         u32 old_size = state->COUNT;                                    \
 563         struct bpf_##NAME##_state *new_##FIELD;                         \
 564         int slot = size / SIZE;                                         \
 565                                                                         \
 566         if (size <= old_size || !size) {                                \
 567                 if (copy_old)                                           \
 568                         return 0;                                       \
 569                 state->COUNT = slot * SIZE;                             \
 570                 if (!size && old_size) {                                \
 571                         kfree(state->FIELD);                            \
 572                         state->FIELD = NULL;                            \
 573                 }                                                       \
 574                 return 0;                                               \
 575         }                                                               \
 576         new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
 577                                     GFP_KERNEL);                        \
 578         if (!new_##FIELD)                                               \
 579                 return -ENOMEM;                                         \
 580         if (copy_old) {                                                 \
 581                 if (state->FIELD)                                       \
 582                         memcpy(new_##FIELD, state->FIELD,               \
 583                                sizeof(*new_##FIELD) * (old_size / SIZE)); \
 584                 memset(new_##FIELD + old_size / SIZE, 0,                \
 585                        sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
 586         }                                                               \
 587         state->COUNT = slot * SIZE;                                     \
 588         kfree(state->FIELD);                                            \
 589         state->FIELD = new_##FIELD;                                     \
 590         return 0;                                                       \
 591 }
 592 /* realloc_reference_state() */
 593 REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
 594 /* realloc_stack_state() */
 595 REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
 596 #undef REALLOC_STATE_FN
 597
 598 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
 599  * make it consume minimal amount of memory. check_stack_write() access from
 600  * the program calls into realloc_func_state() to grow the stack size.
 601  * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
 602  * which realloc_stack_state() copies over. It points to previous
 603  * bpf_verifier_state which is never reallocated.
 604  */
 605 static int realloc_func_state(struct bpf_func_state *state, int stack_size,
 606                               int refs_size, bool copy_old)
 607 {
 608         int err = realloc_reference_state(state, refs_size, copy_old);
 609         if (err)
 610                 return err;
 611         return realloc_stack_state(state, stack_size, copy_old);
 612 }
 613
 614 /* Acquire a pointer id from the env and update the state->refs to include
 615  * this new pointer reference.
 616  * On success, returns a valid pointer id to associate with the register
 617  * On failure, returns a negative errno.
 618  */
 619 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
 620 {
 621         struct bpf_func_state *state = cur_func(env);
 622         int new_ofs = state->acquired_refs;
 623         int id, err;
 624
 625         err = realloc_reference_state(state, state->acquired_refs + 1, true);
 626         if (err)
 627                 return err;
 628         id = ++env->id_gen;
 629         state->refs[new_ofs].id = id;
 630         state->refs[new_ofs].insn_idx = insn_idx;
 631
 632         return id;
 633 }
 634
 635 /* release function corresponding to acquire_reference_state(). Idempotent. */
 636 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
 637 {
 638         int i, last_idx;
 639
 640         last_idx = state->acquired_refs - 1;
 641         for (i = 0; i < state->acquired_refs; i++) {
 642                 if (state->refs[i].id == ptr_id) {
 643                         if (last_idx && i != last_idx)
 644                                 memcpy(&state->refs[i], &state->refs[last_idx],
 645                                        sizeof(*state->refs));
 646                         memset(&state->refs[last_idx], 0, sizeof(*state->refs));
 647                         state->acquired_refs--;
 648                         return 0;
 649                 }
 650         }
 651         return -EINVAL;
 652 }
 653
 654 static int transfer_reference_state(struct bpf_func_state *dst,
 655                                     struct bpf_func_state *src)
 656 {
 657         int err = realloc_reference_state(dst, src->acquired_refs, false);
 658         if (err)
 659                 return err;
 660         err = copy_reference_state(dst, src);
 661         if (err)
 662                 return err;
 663         return 0;
 664 }
 665
 666 static void free_func_state(struct bpf_func_state *state)
 667 {
 668         if (!state)
 669                 return;
 670         kfree(state->refs);
 671         kfree(state->stack);
 672         kfree(state);
 673 }
 674
 675 static void clear_jmp_history(struct bpf_verifier_state *state)
 676 {
 677         kfree(state->jmp_history);
 678         state->jmp_history = NULL;
 679         state->jmp_history_cnt = 0;
 680 }
 681
 682 static void free_verifier_state(struct bpf_verifier_state *state,
 683                                 bool free_self)
 684 {
 685         int i;
 686
 687         for (i = 0; i <= state->curframe; i++) {
 688                 free_func_state(state->frame[i]);
 689                 state->frame[i] = NULL;
 690         }
 691         clear_jmp_history(state);
 692         if (free_self)
 693                 kfree(state);
 694 }
 695
 696 /* copy verifier state from src to dst growing dst stack space
 697  * when necessary to accommodate larger src stack
 698  */
 699 static int copy_func_state(struct bpf_func_state *dst,
 700                            const struct bpf_func_state *src)
 701 {
 702         int err;
 703
 704         err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
 705                                  false);
 706         if (err)
 707                 return err;
 708         memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
 709         err = copy_reference_state(dst, src);
 710         if (err)
 711                 return err;
 712         return copy_stack_state(dst, src);
 713 }
 714
 715 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 716                                const struct bpf_verifier_state *src)
 717 {
 718         struct bpf_func_state *dst;
 719         u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
 720         int i, err;
 721
 722         if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
 723                 kfree(dst_state->jmp_history);
 724                 dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
 725                 if (!dst_state->jmp_history)
 726                         return -ENOMEM;
 727         }
 728         memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
 729         dst_state->jmp_history_cnt = src->jmp_history_cnt;
 730
 731         /* if dst has more stack frames then src frame, free them */
 732         for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
 733                 free_func_state(dst_state->frame[i]);
 734                 dst_state->frame[i] = NULL;
 735         }
 736         dst_state->speculative = src->speculative;
 737         dst_state->curframe = src->curframe;
 738         dst_state->active_spin_lock = src->active_spin_lock;
 739         dst_state->branches = src->branches;
 740         dst_state->parent = src->parent;
 741         dst_state->first_insn_idx = src->first_insn_idx;
 742         dst_state->last_insn_idx = src->last_insn_idx;
 743         for (i = 0; i <= src->curframe; i++) {
 744                 dst = dst_state->frame[i];
 745                 if (!dst) {
 746                         dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 747                         if (!dst)
 748                                 return -ENOMEM;
 749                         dst_state->frame[i] = dst;
 750                 }
 751                 err = copy_func_state(dst, src->frame[i]);
 752                 if (err)
 753                         return err;
 754         }
 755         return 0;
 756 }
 757
 758 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
 759 {
 760         while (st) {
 761                 u32 br = --st->branches;
 762
 763                 /* WARN_ON(br > 1) technically makes sense here,
 764                  * but see comment in push_stack(), hence:
 765                  */
 766                 WARN_ONCE((int)br < 0,
 767                           "BUG update_branch_counts:branches_to_explore=%d\n",
 768                           br);
 769                 if (br)
 770                         break;
 771                 st = st->parent;
 772         }
 773 }
 774
 775 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
 776                      int *insn_idx)
 777 {
 778         struct bpf_verifier_state *cur = env->cur_state;
 779         struct bpf_verifier_stack_elem *elem, *head = env->head;
 780         int err;
 781
 782         if (env->head == NULL)
 783                 return -ENOENT;
 784
 785         if (cur) {
 786                 err = copy_verifier_state(cur, &head->st);
 787                 if (err)
 788                         return err;
 789         }
 790         if (insn_idx)
 791                 *insn_idx = head->insn_idx;
 792         if (prev_insn_idx)
 793                 *prev_insn_idx = head->prev_insn_idx;
 794         elem = head->next;
 795         free_verifier_state(&head->st, false);
 796         kfree(head);
 797         env->head = elem;
 798         env->stack_size--;
 799         return 0;
 800 }
 801
 802 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 803                                              int insn_idx, int prev_insn_idx,
 804                                              bool speculative)
 805 {
 806         struct bpf_verifier_state *cur = env->cur_state;
 807         struct bpf_verifier_stack_elem *elem;
 808         int err;
 809
 810         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
 811         if (!elem)
 812                 goto err;
 813
 814         elem->insn_idx = insn_idx;
 815         elem->prev_insn_idx = prev_insn_idx;
 816         elem->next = env->head;
 817         env->head = elem;
 818         env->stack_size++;
 819         err = copy_verifier_state(&elem->st, cur);
 820         if (err)
 821                 goto err;
 822         elem->st.speculative |= speculative;
 823         if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
 824                 verbose(env, "The sequence of %d jumps is too complex.\n",
 825                         env->stack_size);
 826                 goto err;
 827         }
 828         if (elem->st.parent) {
 829                 ++elem->st.parent->branches;
 830                 /* WARN_ON(branches > 2) technically makes sense here,
 831                  * but
 832                  * 1. speculative states will bump 'branches' for non-branch
 833                  * instructions
 834                  * 2. is_state_visited() heuristics may decide not to create
 835                  * a new state for a sequence of branches and all such current
 836                  * and cloned states will be pointing to a single parent state
 837                  * which might have large 'branches' count.
 838                  */
 839         }
 840         return &elem->st;
 841 err:
 842         free_verifier_state(env->cur_state, true);
 843         env->cur_state = NULL;
 844         /* pop all elements and return */
 845         while (!pop_stack(env, NULL, NULL));
 846         return NULL;
 847 }
 848
 849 #define CALLER_SAVED_REGS 6
 850 static const int caller_saved[CALLER_SAVED_REGS] = {
 851         BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 852 };
 853
 854 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
 855                                 struct bpf_reg_state *reg);
 856
 857 /* Mark the unknown part of a register (variable offset or scalar value) as
 858  * known to have the value @imm.
 859  */
 860 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
 861 {
 862         /* Clear id, off, and union(map_ptr, range) */
 863         memset(((u8 *)reg) + sizeof(reg->type), 0,
 864                offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
 865         reg->var_off = tnum_const(imm);
 866         reg->smin_value = (s64)imm;
 867         reg->smax_value = (s64)imm;
 868         reg->umin_value = imm;
 869         reg->umax_value = imm;
 870 }
 871
 872 /* Mark the 'variable offset' part of a register as zero.  This should be
 873  * used only on registers holding a pointer type.
 874  */
 875 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
 876 {
 877         __mark_reg_known(reg, 0);
 878 }
 879
 880 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
 881 {
 882         __mark_reg_known(reg, 0);
 883         reg->type = SCALAR_VALUE;
 884 }
 885
 886 static void mark_reg_known_zero(struct bpf_verifier_env *env,
 887                                 struct bpf_reg_state *regs, u32 regno)
 888 {
 889         if (WARN_ON(regno >= MAX_BPF_REG)) {
 890                 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
 891                 /* Something bad happened, let's kill all regs */
 892                 for (regno = 0; regno < MAX_BPF_REG; regno++)
 893                         __mark_reg_not_init(env, regs + regno);
 894                 return;
 895         }
 896         __mark_reg_known_zero(regs + regno);
 897 }
 898
 899 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
 900 {
 901         return type_is_pkt_pointer(reg->type);
 902 }
 903
 904 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
 905 {
 906         return reg_is_pkt_pointer(reg) ||
 907                reg->type == PTR_TO_PACKET_END;
 908 }
 909
 910 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
 911 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
 912                                     enum bpf_reg_type which)
 913 {
 914         /* The register can already have a range from prior markings.
 915          * This is fine as long as it hasn't been advanced from its
 916          * origin.
 917          */
 918         return reg->type == which &&
 919                reg->id == 0 &&
 920                reg->off == 0 &&
 921                tnum_equals_const(reg->var_off, 0);
 922 }
 923
 924 /* Attempts to improve min/max values based on var_off information */
 925 static void __update_reg_bounds(struct bpf_reg_state *reg)
 926 {
 927         /* min signed is max(sign bit) | min(other bits) */
 928         reg->smin_value = max_t(s64, reg->smin_value,
 929                                 reg->var_off.value | (reg->var_off.mask & S64_MIN));
 930         /* max signed is min(sign bit) | max(other bits) */
 931         reg->smax_value = min_t(s64, reg->smax_value,
 932                                 reg->var_off.value | (reg->var_off.mask & S64_MAX));
 933         reg->umin_value = max(reg->umin_value, reg->var_off.value);
 934         reg->umax_value = min(reg->umax_value,
 935                               reg->var_off.value | reg->var_off.mask);
 936 }
 937
 938 /* Uses signed min/max values to inform unsigned, and vice-versa */
 939 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
 940 {
 941         /* Learn sign from signed bounds.
 942          * If we cannot cross the sign boundary, then signed and unsigned bounds
 943          * are the same, so combine.  This works even in the negative case, e.g.
 944          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
 945          */
 946         if (reg->smin_value >= 0 || reg->smax_value < 0) {
 947                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
 948                                                           reg->umin_value);
 949                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
 950                                                           reg->umax_value);
 951                 return;
 952         }
 953         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
 954          * boundary, so we must be careful.
 955          */
 956         if ((s64)reg->umax_value >= 0) {
 957                 /* Positive.  We can't learn anything from the smin, but smax
 958                  * is positive, hence safe.
 959                  */
 960                 reg->smin_value = reg->umin_value;
 961                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
 962                                                           reg->umax_value);
 963         } else if ((s64)reg->umin_value < 0) {
 964                 /* Negative.  We can't learn anything from the smax, but smin
 965                  * is negative, hence safe.
 966                  */
 967                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
 968                                                           reg->umin_value);
 969                 reg->smax_value = reg->umax_value;
 970         }
 971 }
 972
 973 /* Attempts to improve var_off based on unsigned min/max information */
 974 static void __reg_bound_offset(struct bpf_reg_state *reg)
 975 {
 976         reg->var_off = tnum_intersect(reg->var_off,
 977                                       tnum_range(reg->umin_value,
 978                                                  reg->umax_value));
 979 }
 980
 981 /* Reset the min/max bounds of a register */
 982 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
 983 {
 984         reg->smin_value = S64_MIN;
 985         reg->smax_value = S64_MAX;
 986         reg->umin_value = 0;
 987         reg->umax_value = U64_MAX;
 988 }
 989
 990 /* Mark a register as having a completely unknown (scalar) value. */
 991 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
 992                                struct bpf_reg_state *reg)
 993 {
 994         /*
 995          * Clear type, id, off, and union(map_ptr, range) and
 996          * padding between 'type' and union
 997          */
 998         memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
 999         reg->type = SCALAR_VALUE;
1000         reg->var_off = tnum_unknown;
1001         reg->frameno = 0;
1002         reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
1003                        true : false;
1004         __mark_reg_unbounded(reg);
1005 }
1006
1007 static void mark_reg_unknown(struct bpf_verifier_env *env,
1008                              struct bpf_reg_state *regs, u32 regno)
1009 {
1010         if (WARN_ON(regno >= MAX_BPF_REG)) {
1011                 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1012                 /* Something bad happened, let's kill all regs except FP */
1013                 for (regno = 0; regno < BPF_REG_FP; regno++)
1014                         __mark_reg_not_init(env, regs + regno);
1015                 return;
1016         }
1017         __mark_reg_unknown(env, regs + regno);
1018 }
1019
1020 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1021                                 struct bpf_reg_state *reg)
1022 {
1023         __mark_reg_unknown(env, reg);
1024         reg->type = NOT_INIT;
1025 }
1026
1027 static void mark_reg_not_init(struct bpf_verifier_env *env,
1028                               struct bpf_reg_state *regs, u32 regno)
1029 {
1030         if (WARN_ON(regno >= MAX_BPF_REG)) {
1031                 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1032                 /* Something bad happened, let's kill all regs except FP */
1033                 for (regno = 0; regno < BPF_REG_FP; regno++)
1034                         __mark_reg_not_init(env, regs + regno);
1035                 return;
1036         }
1037         __mark_reg_not_init(env, regs + regno);
1038 }
1039
1040 #define DEF_NOT_SUBREG  (0)
1041 static void init_reg_state(struct bpf_verifier_env *env,
1042                            struct bpf_func_state *state)
1043 {
1044         struct bpf_reg_state *regs = state->regs;
1045         int i;
1046
1047         for (i = 0; i < MAX_BPF_REG; i++) {
1048                 mark_reg_not_init(env, regs, i);
1049                 regs[i].live = REG_LIVE_NONE;
1050                 regs[i].parent = NULL;
1051                 regs[i].subreg_def = DEF_NOT_SUBREG;
1052         }
1053
1054         /* frame pointer */
1055         regs[BPF_REG_FP].type = PTR_TO_STACK;
1056         mark_reg_known_zero(env, regs, BPF_REG_FP);
1057         regs[BPF_REG_FP].frameno = state->frameno;
1058
1059         /* 1st arg to a function */
1060         regs[BPF_REG_1].type = PTR_TO_CTX;
1061         mark_reg_known_zero(env, regs, BPF_REG_1);
1062 }
1063
1064 #define BPF_MAIN_FUNC (-1)
1065 static void init_func_state(struct bpf_verifier_env *env,
1066                             struct bpf_func_state *state,
1067                             int callsite, int frameno, int subprogno)
1068 {
1069         state->callsite = callsite;
1070         state->frameno = frameno;
1071         state->subprogno = subprogno;
1072         init_reg_state(env, state);
1073 }
1074
1075 enum reg_arg_type {
1076         SRC_OP,         /* register is used as source operand */
1077         DST_OP,         /* register is used as destination operand */
1078         DST_OP_NO_MARK  /* same as above, check only, don't mark */
1079 };
1080
1081 static int cmp_subprogs(const void *a, const void *b)
1082 {
1083         return ((struct bpf_subprog_info *)a)->start -
1084                ((struct bpf_subprog_info *)b)->start;
1085 }
1086
1087 static int find_subprog(struct bpf_verifier_env *env, int off)
1088 {
1089         struct bpf_subprog_info *p;
1090
1091         p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1092                     sizeof(env->subprog_info[0]), cmp_subprogs);
1093         if (!p)
1094                 return -ENOENT;
1095         return p - env->subprog_info;
1096
1097 }
1098
1099 static int add_subprog(struct bpf_verifier_env *env, int off)
1100 {
1101         int insn_cnt = env->prog->len;
1102         int ret;
1103
1104         if (off >= insn_cnt || off < 0) {
1105                 verbose(env, "call to invalid destination\n");
1106                 return -EINVAL;
1107         }
1108         ret = find_subprog(env, off);
1109         if (ret >= 0)
1110                 return 0;
1111         if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1112                 verbose(env, "too many subprograms\n");
1113                 return -E2BIG;
1114         }
1115         env->subprog_info[env->subprog_cnt++].start = off;
1116         sort(env->subprog_info, env->subprog_cnt,
1117              sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1118         return 0;
1119 }
1120
1121 static int check_subprogs(struct bpf_verifier_env *env)
1122 {
1123         int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
1124         struct bpf_subprog_info *subprog = env->subprog_info;
1125         struct bpf_insn *insn = env->prog->insnsi;
1126         int insn_cnt = env->prog->len;
1127
1128         /* Add entry function. */
1129         ret = add_subprog(env, 0);
1130         if (ret < 0)
1131                 return ret;
1132
1133         /* determine subprog starts. The end is one before the next starts */
1134         for (i = 0; i < insn_cnt; i++) {
1135                 if (insn[i].code != (BPF_JMP | BPF_CALL))
1136                         continue;
1137                 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1138                         continue;
1139                 if (!env->allow_ptr_leaks) {
1140                         verbose(env, "function calls to other bpf functions are allowed for root only\n");
1141                         return -EPERM;
1142                 }
1143                 ret = add_subprog(env, i + insn[i].imm + 1);
1144                 if (ret < 0)
1145                         return ret;
1146         }
1147
1148         /* Add a fake 'exit' subprog which could simplify subprog iteration
1149          * logic. 'subprog_cnt' should not be increased.
1150          */
1151         subprog[env->subprog_cnt].start = insn_cnt;
1152
1153         if (env->log.level & BPF_LOG_LEVEL2)
1154                 for (i = 0; i < env->subprog_cnt; i++)
1155                         verbose(env, "func#%d @%d\n", i, subprog[i].start);
1156
1157         /* now check that all jumps are within the same subprog */
1158         subprog_start = subprog[cur_subprog].start;
1159         subprog_end = subprog[cur_subprog + 1].start;
1160         for (i = 0; i < insn_cnt; i++) {
1161                 u8 code = insn[i].code;
1162
1163                 if (code == (BPF_JMP | BPF_CALL) &&
1164                     insn[i].imm == BPF_FUNC_tail_call &&
1165                     insn[i].src_reg != BPF_PSEUDO_CALL)
1166                         subprog[cur_subprog].has_tail_call = true;
1167                 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
1168                         goto next;
1169                 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1170                         goto next;
1171                 off = i + insn[i].off + 1;
1172                 if (off < subprog_start || off >= subprog_end) {
1173                         verbose(env, "jump out of range from insn %d to %d\n", i, off);
1174                         return -EINVAL;
1175                 }
1176 next:
1177                 if (i == subprog_end - 1) {
1178                         /* to avoid fall-through from one subprog into another
1179                          * the last insn of the subprog should be either exit
1180                          * or unconditional jump back
1181                          */
1182                         if (code != (BPF_JMP | BPF_EXIT) &&
1183                             code != (BPF_JMP | BPF_JA)) {
1184                                 verbose(env, "last insn is not an exit or jmp\n");
1185                                 return -EINVAL;
1186                         }
1187                         subprog_start = subprog_end;
1188                         cur_subprog++;
1189                         if (cur_subprog < env->subprog_cnt)
1190                                 subprog_end = subprog[cur_subprog + 1].start;
1191                 }
1192         }
1193         return 0;
1194 }
1195
1196 /* Parentage chain of this register (or stack slot) should take care of all
1197  * issues like callee-saved registers, stack slot allocation time, etc.
1198  */
1199 static int mark_reg_read(struct bpf_verifier_env *env,
1200                          const struct bpf_reg_state *state,
1201                          struct bpf_reg_state *parent, u8 flag)
1202 {
1203         bool writes = parent == state->parent; /* Observe write marks */
1204         int cnt = 0;
1205
1206         while (parent) {
1207                 /* if read wasn't screened by an earlier write ... */
1208                 if (writes && state->live & REG_LIVE_WRITTEN)
1209                         break;
1210                 if (parent->live & REG_LIVE_DONE) {
1211                         verbose(env, "verifier BUG type %s var_off %lld off %d\n",
1212                                 reg_type_str[parent->type],
1213                                 parent->var_off.value, parent->off);
1214                         return -EFAULT;
1215                 }
1216                 /* The first condition is more likely to be true than the
1217                  * second, checked it first.
1218                  */
1219                 if ((parent->live & REG_LIVE_READ) == flag ||
1220                     parent->live & REG_LIVE_READ64)
1221                         /* The parentage chain never changes and
1222                          * this parent was already marked as LIVE_READ.
1223                          * There is no need to keep walking the chain again and
1224                          * keep re-marking all parents as LIVE_READ.
1225                          * This case happens when the same register is read
1226                          * multiple times without writes into it in-between.
1227                          * Also, if parent has the stronger REG_LIVE_READ64 set,
1228                          * then no need to set the weak REG_LIVE_READ32.
1229                          */
1230                         break;
1231                 /* ... then we depend on parent's value */
1232                 parent->live |= flag;
1233                 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
1234                 if (flag == REG_LIVE_READ64)
1235                         parent->live &= ~REG_LIVE_READ32;
1236                 state = parent;
1237                 parent = state->parent;
1238                 writes = true;
1239                 cnt++;
1240         }
1241
1242         if (env->longest_mark_read_walk < cnt)
1243                 env->longest_mark_read_walk = cnt;
1244         return 0;
1245 }
1246
1247 /* This function is supposed to be used by the following 32-bit optimization
1248  * code only. It returns TRUE if the source or destination register operates
1249  * on 64-bit, otherwise return FALSE.
1250  */
1251 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
1252                      u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
1253 {
1254         u8 code, class, op;
1255
1256         code = insn->code;
1257         class = BPF_CLASS(code);
1258         op = BPF_OP(code);
1259         if (class == BPF_JMP) {
1260                 /* BPF_EXIT for "main" will reach here. Return TRUE
1261                  * conservatively.
1262                  */
1263                 if (op == BPF_EXIT)
1264                         return true;
1265                 if (op == BPF_CALL) {
1266                         /* BPF to BPF call will reach here because of marking
1267                          * caller saved clobber with DST_OP_NO_MARK for which we
1268                          * don't care the register def because they are anyway
1269                          * marked as NOT_INIT already.
1270                          */
1271                         if (insn->src_reg == BPF_PSEUDO_CALL)
1272                                 return false;
1273                         /* Helper call will reach here because of arg type
1274                          * check, conservatively return TRUE.
1275                          */
1276                         if (t == SRC_OP)
1277                                 return true;
1278
1279                         return false;
1280                 }
1281         }
1282
1283         if (class == BPF_ALU64 || class == BPF_JMP ||
1284             /* BPF_END always use BPF_ALU class. */
1285             (class == BPF_ALU && op == BPF_END && insn->imm == 64))
1286                 return true;
1287
1288         if (class == BPF_ALU || class == BPF_JMP32)
1289                 return false;
1290
1291         if (class == BPF_LDX) {
1292                 if (t != SRC_OP)
1293                         return BPF_SIZE(code) == BPF_DW;
1294                 /* LDX source must be ptr. */
1295                 return true;
1296         }
1297
1298         if (class == BPF_STX) {
1299                 if (reg->type != SCALAR_VALUE)
1300                         return true;
1301                 return BPF_SIZE(code) == BPF_DW;
1302         }
1303
1304         if (class == BPF_LD) {
1305                 u8 mode = BPF_MODE(code);
1306
1307                 /* LD_IMM64 */
1308                 if (mode == BPF_IMM)
1309                         return true;
1310
1311                 /* Both LD_IND and LD_ABS return 32-bit data. */
1312                 if (t != SRC_OP)
1313                         return  false;
1314
1315                 /* Implicit ctx ptr. */
1316                 if (regno == BPF_REG_6)
1317                         return true;
1318
1319                 /* Explicit source could be any width. */
1320                 return true;
1321         }
1322
1323         if (class == BPF_ST)
1324                 /* The only source register for BPF_ST is a ptr. */
1325                 return true;
1326
1327         /* Conservatively return true at default. */
1328         return true;
1329 }
1330
1331 /* Return TRUE if INSN doesn't have explicit value define. */
1332 static bool insn_no_def(struct bpf_insn *insn)
1333 {
1334         u8 class = BPF_CLASS(insn->code);
1335
1336         return (class == BPF_JMP || class == BPF_JMP32 ||
1337                 class == BPF_STX || class == BPF_ST);
1338 }
1339
1340 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
1341 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
1342 {
1343         if (insn_no_def(insn))
1344                 return false;
1345
1346         return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
1347 }
1348
1349 static void mark_insn_zext(struct bpf_verifier_env *env,
1350                            struct bpf_reg_state *reg)
1351 {
1352         s32 def_idx = reg->subreg_def;
1353
1354         if (def_idx == DEF_NOT_SUBREG)
1355                 return;
1356
1357         env->insn_aux_data[def_idx - 1].zext_dst = true;
1358         /* The dst will be zero extended, so won't be sub-register anymore. */
1359         reg->subreg_def = DEF_NOT_SUBREG;
1360 }
1361
1362 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1363                          enum reg_arg_type t)
1364 {
1365         struct bpf_verifier_state *vstate = env->cur_state;
1366         struct bpf_func_state *state = vstate->frame[vstate->curframe];
1367         struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
1368         struct bpf_reg_state *reg, *regs = state->regs;
1369         bool rw64;
1370
1371         if (regno >= MAX_BPF_REG) {
1372                 verbose(env, "R%d is invalid\n", regno);
1373                 return -EINVAL;
1374         }
1375
1376         reg = &regs[regno];
1377         rw64 = is_reg64(env, insn, regno, reg, t);
1378         if (t == SRC_OP) {
1379                 /* check whether register used as source operand can be read */
1380                 if (reg->type == NOT_INIT) {
1381                         verbose(env, "R%d !read_ok\n", regno);
1382                         return -EACCES;
1383                 }
1384                 /* We don't need to worry about FP liveness because it's read-only */
1385                 if (regno == BPF_REG_FP)
1386                         return 0;
1387
1388                 if (rw64)
1389                         mark_insn_zext(env, reg);
1390
1391                 return mark_reg_read(env, reg, reg->parent,
1392                                      rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
1393         } else {
1394                 /* check whether register used as dest operand can be written to */
1395                 if (regno == BPF_REG_FP) {
1396                         verbose(env, "frame pointer is read only\n");
1397                         return -EACCES;
1398                 }
1399                 reg->live |= REG_LIVE_WRITTEN;
1400                 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
1401                 if (t == DST_OP)
1402                         mark_reg_unknown(env, regs, regno);
1403         }
1404         return 0;
1405 }
1406
1407 /* for any branch, call, exit record the history of jmps in the given state */
1408 static int push_jmp_history(struct bpf_verifier_env *env,
1409                             struct bpf_verifier_state *cur)
1410 {
1411         u32 cnt = cur->jmp_history_cnt;
1412         struct bpf_idx_pair *p;
1413
1414         cnt++;
1415         p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
1416         if (!p)
1417                 return -ENOMEM;
1418         p[cnt - 1].idx = env->insn_idx;
1419         p[cnt - 1].prev_idx = env->prev_insn_idx;
1420         cur->jmp_history = p;
1421         cur->jmp_history_cnt = cnt;
1422         return 0;
1423 }
1424
1425 /* Backtrack one insn at a time. If idx is not at the top of recorded
1426  * history then previous instruction came from straight line execution.
1427  */
1428 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
1429                              u32 *history)
1430 {
1431         u32 cnt = *history;
1432
1433         if (cnt && st->jmp_history[cnt - 1].idx == i) {
1434                 i = st->jmp_history[cnt - 1].prev_idx;
1435                 (*history)--;
1436         } else {
1437                 i--;
1438         }
1439         return i;
1440 }
1441
1442 /* For given verifier state backtrack_insn() is called from the last insn to
1443  * the first insn. Its purpose is to compute a bitmask of registers and
1444  * stack slots that needs precision in the parent verifier state.
1445  */
1446 static int backtrack_insn(struct bpf_verifier_env *env, int idx,
1447                           u32 *reg_mask, u64 *stack_mask)
1448 {
1449         const struct bpf_insn_cbs cbs = {
1450                 .cb_print       = verbose,
1451                 .private_data   = env,
1452         };
1453         struct bpf_insn *insn = env->prog->insnsi + idx;
1454         u8 class = BPF_CLASS(insn->code);
1455         u8 opcode = BPF_OP(insn->code);
1456         u8 mode = BPF_MODE(insn->code);
1457         u32 dreg = 1u << insn->dst_reg;
1458         u32 sreg = 1u << insn->src_reg;
1459         u32 spi;
1460
1461         if (insn->code == 0)
1462                 return 0;
1463         if (env->log.level & BPF_LOG_LEVEL) {
1464                 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
1465                 verbose(env, "%d: ", idx);
1466                 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
1467         }
1468
1469         if (class == BPF_ALU || class == BPF_ALU64) {
1470                 if (!(*reg_mask & dreg))
1471                         return 0;
1472                 if (opcode == BPF_MOV) {
1473                         if (BPF_SRC(insn->code) == BPF_X) {
1474                                 /* dreg = sreg
1475                                  * dreg needs precision after this insn
1476                                  * sreg needs precision before this insn
1477                                  */
1478                                 *reg_mask &= ~dreg;
1479                                 *reg_mask |= sreg;
1480                         } else {
1481                                 /* dreg = K
1482                                  * dreg needs precision after this insn.
1483                                  * Corresponding register is already marked
1484                                  * as precise=true in this verifier state.
1485                                  * No further markings in parent are necessary
1486                                  */
1487                                 *reg_mask &= ~dreg;
1488                         }
1489                 } else {
1490                         if (BPF_SRC(insn->code) == BPF_X) {
1491                                 /* dreg += sreg
1492                                  * both dreg and sreg need precision
1493                                  * before this insn
1494                                  */
1495                                 *reg_mask |= sreg;
1496                         } /* else dreg += K
1497                            * dreg still needs precision before this insn
1498                            */
1499                 }
1500         } else if (class == BPF_LDX) {
1501                 if (!(*reg_mask & dreg))
1502                         return 0;
1503                 *reg_mask &= ~dreg;
1504
1505                 /* scalars can only be spilled into stack w/o losing precision.
1506                  * Load from any other memory can be zero extended.
1507                  * The desire to keep that precision is already indicated
1508                  * by 'precise' mark in corresponding register of this state.
1509                  * No further tracking necessary.
1510                  */
1511                 if (insn->src_reg != BPF_REG_FP)
1512                         return 0;
1513                 if (BPF_SIZE(insn->code) != BPF_DW)
1514                         return 0;
1515
1516                 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
1517                  * that [fp - off] slot contains scalar that needs to be
1518                  * tracked with precision
1519                  */
1520                 spi = (-insn->off - 1) / BPF_REG_SIZE;
1521                 if (spi >= 64) {
1522                         verbose(env, "BUG spi %d\n", spi);
1523                         WARN_ONCE(1, "verifier backtracking bug");
1524                         return -EFAULT;
1525                 }
1526                 *stack_mask |= 1ull << spi;
1527         } else if (class == BPF_STX || class == BPF_ST) {
1528                 if (*reg_mask & dreg)
1529                         /* stx & st shouldn't be using _scalar_ dst_reg
1530                          * to access memory. It means backtracking
1531                          * encountered a case of pointer subtraction.
1532                          */
1533                         return -ENOTSUPP;
1534                 /* scalars can only be spilled into stack */
1535                 if (insn->dst_reg != BPF_REG_FP)
1536                         return 0;
1537                 if (BPF_SIZE(insn->code) != BPF_DW)
1538                         return 0;
1539                 spi = (-insn->off - 1) / BPF_REG_SIZE;
1540                 if (spi >= 64) {
1541                         verbose(env, "BUG spi %d\n", spi);
1542                         WARN_ONCE(1, "verifier backtracking bug");
1543                         return -EFAULT;
1544                 }
1545                 if (!(*stack_mask & (1ull << spi)))
1546                         return 0;
1547                 *stack_mask &= ~(1ull << spi);
1548                 if (class == BPF_STX)
1549                         *reg_mask |= sreg;
1550         } else if (class == BPF_JMP || class == BPF_JMP32) {
1551                 if (opcode == BPF_CALL) {
1552                         if (insn->src_reg == BPF_PSEUDO_CALL)
1553                                 return -ENOTSUPP;
1554                         /* regular helper call sets R0 */
1555                         *reg_mask &= ~1;
1556                         if (*reg_mask & 0x3f) {
1557                                 /* if backtracing was looking for registers R1-R5
1558                                  * they should have been found already.
1559                                  */
1560                                 verbose(env, "BUG regs %x\n", *reg_mask);
1561                                 WARN_ONCE(1, "verifier backtracking bug");
1562                                 return -EFAULT;
1563                         }
1564                 } else if (opcode == BPF_EXIT) {
1565                         return -ENOTSUPP;
1566                 } else if (BPF_SRC(insn->code) == BPF_X) {
1567                         if (!(*reg_mask & (dreg | sreg)))
1568                                 return 0;
1569                         /* dreg <cond> sreg
1570                          * Both dreg and sreg need precision before
1571                          * this insn. If only sreg was marked precise
1572                          * before it would be equally necessary to
1573                          * propagate it to dreg.
1574                          */
1575                         *reg_mask |= (sreg | dreg);
1576                          /* else dreg <cond> K
1577                           * Only dreg still needs precision before
1578                           * this insn, so for the K-based conditional
1579                           * there is nothing new to be marked.
1580                           */
1581                 }
1582         } else if (class == BPF_LD) {
1583                 if (!(*reg_mask & dreg))
1584                         return 0;
1585                 *reg_mask &= ~dreg;
1586                 /* It's ld_imm64 or ld_abs or ld_ind.
1587                  * For ld_imm64 no further tracking of precision
1588                  * into parent is necessary
1589                  */
1590                 if (mode == BPF_IND || mode == BPF_ABS)
1591                         /* to be analyzed */
1592                         return -ENOTSUPP;
1593         }
1594         return 0;
1595 }
1596
1597 /* the scalar precision tracking algorithm:
1598  * . at the start all registers have precise=false.
1599  * . scalar ranges are tracked as normal through alu and jmp insns.
1600  * . once precise value of the scalar register is used in:
1601  *   .  ptr + scalar alu
1602  *   . if (scalar cond K|scalar)
1603  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
1604  *   backtrack through the verifier states and mark all registers and
1605  *   stack slots with spilled constants that these scalar regisers
1606  *   should be precise.
1607  * . during state pruning two registers (or spilled stack slots)
1608  *   are equivalent if both are not precise.
1609  *
1610  * Note the verifier cannot simply walk register parentage chain,
1611  * since many different registers and stack slots could have been
1612  * used to compute single precise scalar.
1613  *
1614  * The approach of starting with precise=true for all registers and then
1615  * backtrack to mark a register as not precise when the verifier detects
1616  * that program doesn't care about specific value (e.g., when helper
1617  * takes register as ARG_ANYTHING parameter) is not safe.
1618  *
1619  * It's ok to walk single parentage chain of the verifier states.
1620  * It's possible that this backtracking will go all the way till 1st insn.
1621  * All other branches will be explored for needing precision later.
1622  *
1623  * The backtracking needs to deal with cases like:
1624  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
1625  * r9 -= r8
1626  * r5 = r9
1627  * if r5 > 0x79f goto pc+7
1628  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
1629  * r5 += 1
1630  * ...
1631  * call bpf_perf_event_output#25
1632  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
1633  *
1634  * and this case:
1635  * r6 = 1
1636  * call foo // uses callee's r6 inside to compute r0
1637  * r0 += r6
1638  * if r0 == 0 goto
1639  *
1640  * to track above reg_mask/stack_mask needs to be independent for each frame.
1641  *
1642  * Also if parent's curframe > frame where backtracking started,
1643  * the verifier need to mark registers in both frames, otherwise callees
1644  * may incorrectly prune callers. This is similar to
1645  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
1646  *
1647  * For now backtracking falls back into conservative marking.
1648  */
1649 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
1650                                      struct bpf_verifier_state *st)
1651 {
1652         struct bpf_func_state *func;
1653         struct bpf_reg_state *reg;
1654         int i, j;
1655
1656         /* big hammer: mark all scalars precise in this path.
1657          * pop_stack may still get !precise scalars.
1658          */
1659         for (; st; st = st->parent)
1660                 for (i = 0; i <= st->curframe; i++) {
1661                         func = st->frame[i];
1662                         for (j = 0; j < BPF_REG_FP; j++) {
1663                                 reg = &func->regs[j];
1664                                 if (reg->type != SCALAR_VALUE)
1665                                         continue;
1666                                 reg->precise = true;
1667                         }
1668                         for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
1669                                 if (func->stack[j].slot_type[0] != STACK_SPILL)
1670                                         continue;
1671                                 reg = &func->stack[j].spilled_ptr;
1672                                 if (reg->type != SCALAR_VALUE)
1673                                         continue;
1674                                 reg->precise = true;
1675                         }
1676                 }
1677 }
1678
1679 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
1680                                   int spi)
1681 {
1682         struct bpf_verifier_state *st = env->cur_state;
1683         int first_idx = st->first_insn_idx;
1684         int last_idx = env->insn_idx;
1685         struct bpf_func_state *func;
1686         struct bpf_reg_state *reg;
1687         u32 reg_mask = regno >= 0 ? 1u << regno : 0;
1688         u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
1689         bool skip_first = true;
1690         bool new_marks = false;
1691         int i, err;
1692
1693         if (!env->allow_ptr_leaks)
1694                 /* backtracking is root only for now */
1695                 return 0;
1696
1697         func = st->frame[st->curframe];
1698         if (regno >= 0) {
1699                 reg = &func->regs[regno];
1700                 if (reg->type != SCALAR_VALUE) {
1701                         WARN_ONCE(1, "backtracing misuse");
1702                         return -EFAULT;
1703                 }
1704                 if (!reg->precise)
1705                         new_marks = true;
1706                 else
1707                         reg_mask = 0;
1708                 reg->precise = true;
1709         }
1710
1711         while (spi >= 0) {
1712                 if (func->stack[spi].slot_type[0] != STACK_SPILL) {
1713                         stack_mask = 0;
1714                         break;
1715                 }
1716                 reg = &func->stack[spi].spilled_ptr;
1717                 if (reg->type != SCALAR_VALUE) {
1718                         stack_mask = 0;
1719                         break;
1720                 }
1721                 if (!reg->precise)
1722                         new_marks = true;
1723                 else
1724                         stack_mask = 0;
1725                 reg->precise = true;
1726                 break;
1727         }
1728
1729         if (!new_marks)
1730                 return 0;
1731         if (!reg_mask && !stack_mask)
1732                 return 0;
1733         for (;;) {
1734                 DECLARE_BITMAP(mask, 64);
1735                 u32 history = st->jmp_history_cnt;
1736
1737                 if (env->log.level & BPF_LOG_LEVEL)
1738                         verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
1739                 for (i = last_idx;;) {
1740                         if (skip_first) {
1741                                 err = 0;
1742                                 skip_first = false;
1743                         } else {
1744                                 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
1745                         }
1746                         if (err == -ENOTSUPP) {
1747                                 mark_all_scalars_precise(env, st);
1748                                 return 0;
1749                         } else if (err) {
1750                                 return err;
1751                         }
1752                         if (!reg_mask && !stack_mask)
1753                                 /* Found assignment(s) into tracked register in this state.
1754                                  * Since this state is already marked, just return.
1755                                  * Nothing to be tracked further in the parent state.
1756                                  */
1757                                 return 0;
1758                         if (i == first_idx)
1759                                 break;
1760                         i = get_prev_insn_idx(st, i, &history);
1761                         if (i >= env->prog->len) {
1762                                 /* This can happen if backtracking reached insn 0
1763                                  * and there are still reg_mask or stack_mask
1764                                  * to backtrack.
1765                                  * It means the backtracking missed the spot where
1766                                  * particular register was initialized with a constant.
1767                                  */
1768                                 verbose(env, "BUG backtracking idx %d\n", i);
1769                                 WARN_ONCE(1, "verifier backtracking bug");
1770                                 return -EFAULT;
1771                         }
1772                 }
1773                 st = st->parent;
1774                 if (!st)
1775                         break;
1776
1777                 new_marks = false;
1778                 func = st->frame[st->curframe];
1779                 bitmap_from_u64(mask, reg_mask);
1780                 for_each_set_bit(i, mask, 32) {
1781                         reg = &func->regs[i];
1782                         if (reg->type != SCALAR_VALUE) {
1783                                 reg_mask &= ~(1u << i);
1784                                 continue;
1785                         }
1786                         if (!reg->precise)
1787                                 new_marks = true;
1788                         reg->precise = true;
1789                 }
1790
1791                 bitmap_from_u64(mask, stack_mask);
1792                 for_each_set_bit(i, mask, 64) {
1793                         if (i >= func->allocated_stack / BPF_REG_SIZE) {
1794                                 /* the sequence of instructions:
1795                                  * 2: (bf) r3 = r10
1796                                  * 3: (7b) *(u64 *)(r3 -8) = r0
1797                                  * 4: (79) r4 = *(u64 *)(r10 -8)
1798                                  * doesn't contain jmps. It's backtracked
1799                                  * as a single block.
1800                                  * During backtracking insn 3 is not recognized as
1801                                  * stack access, so at the end of backtracking
1802                                  * stack slot fp-8 is still marked in stack_mask.
1803                                  * However the parent state may not have accessed
1804                                  * fp-8 and it's "unallocated" stack space.
1805                                  * In such case fallback to conservative.
1806                                  */
1807                                 mark_all_scalars_precise(env, st);
1808                                 return 0;
1809                         }
1810
1811                         if (func->stack[i].slot_type[0] != STACK_SPILL) {
1812                                 stack_mask &= ~(1ull << i);
1813                                 continue;
1814                         }
1815                         reg = &func->stack[i].spilled_ptr;
1816                         if (reg->type != SCALAR_VALUE) {
1817                                 stack_mask &= ~(1ull << i);
1818                                 continue;
1819                         }
1820                         if (!reg->precise)
1821                                 new_marks = true;
1822                         reg->precise = true;
1823                 }
1824                 if (env->log.level & BPF_LOG_LEVEL) {
1825                         print_verifier_state(env, func);
1826                         verbose(env, "parent %s regs=%x stack=%llx marks\n",
1827                                 new_marks ? "didn't have" : "already had",
1828                                 reg_mask, stack_mask);
1829                 }
1830
1831                 if (!reg_mask && !stack_mask)
1832                         break;
1833                 if (!new_marks)
1834                         break;
1835
1836                 last_idx = st->last_insn_idx;
1837                 first_idx = st->first_insn_idx;
1838         }
1839         return 0;
1840 }
1841
1842 static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
1843 {
1844         return __mark_chain_precision(env, regno, -1);
1845 }
1846
1847 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
1848 {
1849         return __mark_chain_precision(env, -1, spi);
1850 }
1851
1852 static bool is_spillable_regtype(enum bpf_reg_type type)
1853 {
1854         switch (type) {
1855         case PTR_TO_MAP_VALUE:
1856         case PTR_TO_MAP_VALUE_OR_NULL:
1857         case PTR_TO_STACK:
1858         case PTR_TO_CTX:
1859         case PTR_TO_PACKET:
1860         case PTR_TO_PACKET_META:
1861         case PTR_TO_PACKET_END:
1862         case PTR_TO_FLOW_KEYS:
1863         case CONST_PTR_TO_MAP:
1864         case PTR_TO_SOCKET:
1865         case PTR_TO_SOCKET_OR_NULL:
1866         case PTR_TO_SOCK_COMMON:
1867         case PTR_TO_SOCK_COMMON_OR_NULL:
1868         case PTR_TO_TCP_SOCK:
1869         case PTR_TO_TCP_SOCK_OR_NULL:
1870         case PTR_TO_XDP_SOCK:
1871                 return true;
1872         default:
1873                 return false;
1874         }
1875 }
1876
1877 /* Does this register contain a constant zero? */
1878 static bool register_is_null(struct bpf_reg_state *reg)
1879 {
1880         return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
1881 }
1882
1883 static bool register_is_const(struct bpf_reg_state *reg)
1884 {
1885         return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
1886 }
1887
1888 static bool __is_pointer_value(bool allow_ptr_leaks,
1889                                const struct bpf_reg_state *reg)
1890 {
1891         if (allow_ptr_leaks)
1892                 return false;
1893
1894         return reg->type != SCALAR_VALUE;
1895 }
1896
1897 static void save_register_state(struct bpf_func_state *state,
1898                                 int spi, struct bpf_reg_state *reg)
1899 {
1900         int i;
1901
1902         state->stack[spi].spilled_ptr = *reg;
1903         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1904
1905         for (i = 0; i < BPF_REG_SIZE; i++)
1906                 state->stack[spi].slot_type[i] = STACK_SPILL;
1907 }
1908
1909 /* check_stack_read/write functions track spill/fill of registers,
1910  * stack boundary and alignment are checked in check_mem_access()
1911  */
1912 static int check_stack_write(struct bpf_verifier_env *env,
1913                              struct bpf_func_state *state, /* func where register points to */
1914                              int off, int size, int value_regno, int insn_idx)
1915 {
1916         struct bpf_func_state *cur; /* state of the current function */
1917         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
1918         u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
1919         struct bpf_reg_state *reg = NULL;
1920
1921         err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
1922                                  state->acquired_refs, true);
1923         if (err)
1924                 return err;
1925         /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
1926          * so it's aligned access and [off, off + size) are within stack limits
1927          */
1928         if (!env->allow_ptr_leaks &&
1929             state->stack[spi].slot_type[0] == STACK_SPILL &&
1930             size != BPF_REG_SIZE) {
1931                 verbose(env, "attempt to corrupt spilled pointer on stack\n");
1932                 return -EACCES;
1933         }
1934
1935         cur = env->cur_state->frame[env->cur_state->curframe];
1936         if (value_regno >= 0)
1937                 reg = &cur->regs[value_regno];
1938         if (!env->allow_ptr_leaks) {
1939                 bool sanitize = reg && is_spillable_regtype(reg->type);
1940
1941                 for (i = 0; i < size; i++) {
1942                         u8 type = state->stack[spi].slot_type[i];
1943
1944                         if (type != STACK_MISC && type != STACK_ZERO) {
1945                                 sanitize = true;
1946                                 break;
1947                         }
1948                 }
1949
1950                 if (sanitize)
1951                         env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
1952         }
1953
1954         if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
1955             !register_is_null(reg) && env->allow_ptr_leaks) {
1956                 if (dst_reg != BPF_REG_FP) {
1957                         /* The backtracking logic can only recognize explicit
1958                          * stack slot address like [fp - 8]. Other spill of
1959                          * scalar via different register has to be conervative.
1960                          * Backtrack from here and mark all registers as precise
1961                          * that contributed into 'reg' being a constant.
1962                          */
1963                         err = mark_chain_precision(env, value_regno);
1964                         if (err)
1965                                 return err;
1966                 }
1967                 save_register_state(state, spi, reg);
1968         } else if (reg && is_spillable_regtype(reg->type)) {
1969                 /* register containing pointer is being spilled into stack */
1970                 if (size != BPF_REG_SIZE) {
1971                         verbose_linfo(env, insn_idx, "; ");
1972                         verbose(env, "invalid size of register spill\n");
1973                         return -EACCES;
1974                 }
1975                 if (state != cur && reg->type == PTR_TO_STACK) {
1976                         verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
1977                         return -EINVAL;
1978                 }
1979                 save_register_state(state, spi, reg);
1980         } else {
1981                 u8 type = STACK_MISC;
1982
1983                 /* regular write of data into stack destroys any spilled ptr */
1984                 state->stack[spi].spilled_ptr.type = NOT_INIT;
1985                 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
1986                 if (state->stack[spi].slot_type[0] == STACK_SPILL)
1987                         for (i = 0; i < BPF_REG_SIZE; i++)
1988                                 state->stack[spi].slot_type[i] = STACK_MISC;
1989
1990                 /* only mark the slot as written if all 8 bytes were written
1991                  * otherwise read propagation may incorrectly stop too soon
1992                  * when stack slots are partially written.
1993                  * This heuristic means that read propagation will be
1994                  * conservative, since it will add reg_live_read marks
1995                  * to stack slots all the way to first state when programs
1996                  * writes+reads less than 8 bytes
1997                  */
1998                 if (size == BPF_REG_SIZE)
1999                         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2000
2001                 /* when we zero initialize stack slots mark them as such */
2002                 if (reg && register_is_null(reg)) {
2003                         /* backtracking doesn't work for STACK_ZERO yet. */
2004                         err = mark_chain_precision(env, value_regno);
2005                         if (err)
2006                                 return err;
2007                         type = STACK_ZERO;
2008                 }
2009
2010                 /* Mark slots affected by this stack write. */
2011                 for (i = 0; i < size; i++)
2012                         state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
2013                                 type;
2014         }
2015         return 0;
2016 }
2017
2018 static int check_stack_read(struct bpf_verifier_env *env,
2019                             struct bpf_func_state *reg_state /* func where register points to */,
2020                             int off, int size, int value_regno)
2021 {
2022         struct bpf_verifier_state *vstate = env->cur_state;
2023         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2024         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
2025         struct bpf_reg_state *reg;
2026         u8 *stype;
2027
2028         if (reg_state->allocated_stack <= slot) {
2029                 verbose(env, "invalid read from stack off %d+0 size %d\n",
2030                         off, size);
2031                 return -EACCES;
2032         }
2033         stype = reg_state->stack[spi].slot_type;
2034         reg = &reg_state->stack[spi].spilled_ptr;
2035
2036         if (stype[0] == STACK_SPILL) {
2037                 if (size != BPF_REG_SIZE) {
2038                         if (reg->type != SCALAR_VALUE) {
2039                                 verbose_linfo(env, env->insn_idx, "; ");
2040                                 verbose(env, "invalid size of register fill\n");
2041                                 return -EACCES;
2042                         }
2043                         if (value_regno >= 0) {
2044                                 mark_reg_unknown(env, state->regs, value_regno);
2045                                 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
2046                         }
2047                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2048                         return 0;
2049                 }
2050                 for (i = 1; i < BPF_REG_SIZE; i++) {
2051                         if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
2052                                 verbose(env, "corrupted spill memory\n");
2053                                 return -EACCES;
2054                         }
2055                 }
2056
2057                 if (value_regno >= 0) {
2058                         /* restore register state from stack */
2059                         state->regs[value_regno] = *reg;
2060                         /* mark reg as written since spilled pointer state likely
2061                          * has its liveness marks cleared by is_state_visited()
2062                          * which resets stack/reg liveness for state transitions
2063                          */
2064                         state->regs[value_regno].live |= REG_LIVE_WRITTEN;
2065                 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
2066                         /* If value_regno==-1, the caller is asking us whether
2067                          * it is acceptable to use this value as a SCALAR_VALUE
2068                          * (e.g. for XADD).
2069                          * We must not allow unprivileged callers to do that
2070                          * with spilled pointers.
2071                          */
2072                         verbose(env, "leaking pointer from stack off %d\n",
2073                                 off);
2074                         return -EACCES;
2075                 }
2076                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2077         } else {
2078                 int zeros = 0;
2079
2080                 for (i = 0; i < size; i++) {
2081                         if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
2082                                 continue;
2083                         if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
2084                                 zeros++;
2085                                 continue;
2086                         }
2087                         verbose(env, "invalid read from stack off %d+%d size %d\n",
2088                                 off, i, size);
2089                         return -EACCES;
2090                 }
2091                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2092                 if (value_regno >= 0) {
2093                         if (zeros == size) {
2094                                 /* any size read into register is zero extended,
2095                                  * so the whole register == const_zero
2096                                  */
2097                                 __mark_reg_const_zero(&state->regs[value_regno]);
2098                                 /* backtracking doesn't support STACK_ZERO yet,
2099                                  * so mark it precise here, so that later
2100                                  * backtracking can stop here.
2101                                  * Backtracking may not need this if this register
2102                                  * doesn't participate in pointer adjustment.
2103                                  * Forward propagation of precise flag is not
2104                                  * necessary either. This mark is only to stop
2105                                  * backtracking. Any register that contributed
2106                                  * to const 0 was marked precise before spill.
2107                                  */
2108                                 state->regs[value_regno].precise = true;
2109                         } else {
2110                                 /* have read misc data from the stack */
2111                                 mark_reg_unknown(env, state->regs, value_regno);
2112                         }
2113                         state->regs[value_regno].live |= REG_LIVE_WRITTEN;
2114                 }
2115         }
2116         return 0;
2117 }
2118
2119 static int check_stack_access(struct bpf_verifier_env *env,
2120                               const struct bpf_reg_state *reg,
2121                               int off, int size)
2122 {
2123         /* Stack accesses must be at a fixed offset, so that we
2124          * can determine what type of data were returned. See
2125          * check_stack_read().
2126          */
2127         if (!tnum_is_const(reg->var_off)) {
2128                 char tn_buf[48];
2129
2130                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2131                 verbose(env, "variable stack access var_off=%s off=%d size=%d\n",
2132                         tn_buf, off, size);
2133                 return -EACCES;
2134         }
2135
2136         if (off >= 0 || off < -MAX_BPF_STACK) {
2137                 verbose(env, "invalid stack off=%d size=%d\n", off, size);
2138                 return -EACCES;
2139         }
2140
2141         return 0;
2142 }
2143
2144 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
2145                                  int off, int size, enum bpf_access_type type)
2146 {
2147         struct bpf_reg_state *regs = cur_regs(env);
2148         struct bpf_map *map = regs[regno].map_ptr;
2149         u32 cap = bpf_map_flags_to_cap(map);
2150
2151         if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
2152                 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
2153                         map->value_size, off, size);
2154                 return -EACCES;
2155         }
2156
2157         if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
2158                 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
2159                         map->value_size, off, size);
2160                 return -EACCES;
2161         }
2162
2163         return 0;
2164 }
2165
2166 /* check read/write into map element returned by bpf_map_lookup_elem() */
2167 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
2168                               int size, bool zero_size_allowed)
2169 {
2170         struct bpf_reg_state *regs = cur_regs(env);
2171         struct bpf_map *map = regs[regno].map_ptr;
2172
2173         if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
2174             off + size > map->value_size) {
2175                 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
2176                         map->value_size, off, size);
2177                 return -EACCES;
2178         }
2179         return 0;
2180 }
2181
2182 /* check read/write into a map element with possible variable offset */
2183 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
2184                             int off, int size, bool zero_size_allowed)
2185 {
2186         struct bpf_verifier_state *vstate = env->cur_state;
2187         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2188         struct bpf_reg_state *reg = &state->regs[regno];
2189         int err;
2190
2191         /* We may have adjusted the register to this map value, so we
2192          * need to try adding each of min_value and max_value to off
2193          * to make sure our theoretical access will be safe.
2194          */
2195         if (env->log.level & BPF_LOG_LEVEL)
2196                 print_verifier_state(env, state);
2197
2198         /* The minimum value is only important with signed
2199          * comparisons where we can't assume the floor of a
2200          * value is 0.  If we are using signed variables for our
2201          * index'es we need to make sure that whatever we use
2202          * will have a set floor within our range.
2203          */
2204         if (reg->smin_value < 0 &&
2205             (reg->smin_value == S64_MIN ||
2206              (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
2207               reg->smin_value + off < 0)) {
2208                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2209                         regno);
2210                 return -EACCES;
2211         }
2212         err = __check_map_access(env, regno, reg->smin_value + off, size,
2213                                  zero_size_allowed);
2214         if (err) {
2215                 verbose(env, "R%d min value is outside of the array range\n",
2216                         regno);
2217                 return err;
2218         }
2219
2220         /* If we haven't set a max value then we need to bail since we can't be
2221          * sure we won't do bad things.
2222          * If reg->umax_value + off could overflow, treat that as unbounded too.
2223          */
2224         if (reg->umax_value >= BPF_MAX_VAR_OFF) {
2225                 verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
2226                         regno);
2227                 return -EACCES;
2228         }
2229         err = __check_map_access(env, regno, reg->umax_value + off, size,
2230                                  zero_size_allowed);
2231         if (err)
2232                 verbose(env, "R%d max value is outside of the array range\n",
2233                         regno);
2234
2235         if (map_value_has_spin_lock(reg->map_ptr)) {
2236                 u32 lock = reg->map_ptr->spin_lock_off;
2237
2238                 /* if any part of struct bpf_spin_lock can be touched by
2239                  * load/store reject this program.
2240                  * To check that [x1, x2) overlaps with [y1, y2)
2241                  * it is sufficient to check x1 < y2 && y1 < x2.
2242                  */
2243                 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
2244                      lock < reg->umax_value + off + size) {
2245                         verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
2246                         return -EACCES;
2247                 }
2248         }
2249         return err;
2250 }
2251
2252 #define MAX_PACKET_OFF 0xffff
2253
2254 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
2255                                        const struct bpf_call_arg_meta *meta,
2256                                        enum bpf_access_type t)
2257 {
2258         switch (env->prog->type) {
2259         /* Program types only with direct read access go here! */
2260         case BPF_PROG_TYPE_LWT_IN:
2261         case BPF_PROG_TYPE_LWT_OUT:
2262         case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2263         case BPF_PROG_TYPE_SK_REUSEPORT:
2264         case BPF_PROG_TYPE_FLOW_DISSECTOR:
2265         case BPF_PROG_TYPE_CGROUP_SKB:
2266                 if (t == BPF_WRITE)
2267                         return false;
2268                 /* fallthrough */
2269
2270         /* Program types with direct read + write access go here! */
2271         case BPF_PROG_TYPE_SCHED_CLS:
2272         case BPF_PROG_TYPE_SCHED_ACT:
2273         case BPF_PROG_TYPE_XDP:
2274         case BPF_PROG_TYPE_LWT_XMIT:
2275         case BPF_PROG_TYPE_SK_SKB:
2276         case BPF_PROG_TYPE_SK_MSG:
2277                 if (meta)
2278                         return meta->pkt_access;
2279
2280                 env->seen_direct_write = true;
2281                 return true;
2282
2283         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2284                 if (t == BPF_WRITE)
2285                         env->seen_direct_write = true;
2286
2287                 return true;
2288
2289         default:
2290                 return false;
2291         }
2292 }
2293
2294 static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
2295                                  int off, int size, bool zero_size_allowed)
2296 {
2297         struct bpf_reg_state *regs = cur_regs(env);
2298         struct bpf_reg_state *reg = &regs[regno];
2299
2300         if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
2301             (u64)off + size > reg->range) {
2302                 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
2303                         off, size, regno, reg->id, reg->off, reg->range);
2304                 return -EACCES;
2305         }
2306         return 0;
2307 }
2308
2309 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
2310                                int size, bool zero_size_allowed)
2311 {
2312         struct bpf_reg_state *regs = cur_regs(env);
2313         struct bpf_reg_state *reg = &regs[regno];
2314         int err;
2315
2316         /* We may have added a variable offset to the packet pointer; but any
2317          * reg->range we have comes after that.  We are only checking the fixed
2318          * offset.
2319          */
2320
2321         /* We don't allow negative numbers, because we aren't tracking enough
2322          * detail to prove they're safe.
2323          */
2324         if (reg->smin_value < 0) {
2325                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2326                         regno);
2327                 return -EACCES;
2328         }
2329         err = __check_packet_access(env, regno, off, size, zero_size_allowed);
2330         if (err) {
2331                 verbose(env, "R%d offset is outside of the packet\n", regno);
2332                 return err;
2333         }
2334
2335         /* __check_packet_access has made sure "off + size - 1" is within u16.
2336          * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
2337          * otherwise find_good_pkt_pointers would have refused to set range info
2338          * that __check_packet_access would have rejected this pkt access.
2339          * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
2340          */
2341         env->prog->aux->max_pkt_offset =
2342                 max_t(u32, env->prog->aux->max_pkt_offset,
2343                       off + reg->umax_value + size - 1);
2344
2345         return err;
2346 }
2347
2348 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
2349 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
2350                             enum bpf_access_type t, enum bpf_reg_type *reg_type)
2351 {
2352         struct bpf_insn_access_aux info = {
2353                 .reg_type = *reg_type,
2354         };
2355
2356         if (env->ops->is_valid_access &&
2357             env->ops->is_valid_access(off, size, t, env->prog, &info)) {
2358                 /* A non zero info.ctx_field_size indicates that this field is a
2359                  * candidate for later verifier transformation to load the whole
2360                  * field and then apply a mask when accessed with a narrower
2361                  * access than actual ctx access size. A zero info.ctx_field_size
2362                  * will only allow for whole field access and rejects any other
2363                  * type of narrower access.
2364                  */
2365                 *reg_type = info.reg_type;
2366
2367                 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
2368                 /* remember the offset of last byte accessed in ctx */
2369                 if (env->prog->aux->max_ctx_offset < off + size)
2370                         env->prog->aux->max_ctx_offset = off + size;
2371                 return 0;
2372         }
2373
2374         verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
2375         return -EACCES;
2376 }
2377
2378 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
2379                                   int size)
2380 {
2381         if (size < 0 || off < 0 ||
2382             (u64)off + size > sizeof(struct bpf_flow_keys)) {
2383                 verbose(env, "invalid access to flow keys off=%d size=%d\n",
2384                         off, size);
2385                 return -EACCES;
2386         }
2387         return 0;
2388 }
2389
2390 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
2391                              u32 regno, int off, int size,
2392                              enum bpf_access_type t)
2393 {
2394         struct bpf_reg_state *regs = cur_regs(env);
2395         struct bpf_reg_state *reg = &regs[regno];
2396         struct bpf_insn_access_aux info = {};
2397         bool valid;
2398
2399         if (reg->smin_value < 0) {
2400                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2401                         regno);
2402                 return -EACCES;
2403         }
2404
2405         switch (reg->type) {
2406         case PTR_TO_SOCK_COMMON:
2407                 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
2408                 break;
2409         case PTR_TO_SOCKET:
2410                 valid = bpf_sock_is_valid_access(off, size, t, &info);
2411                 break;
2412         case PTR_TO_TCP_SOCK:
2413                 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
2414                 break;
2415         case PTR_TO_XDP_SOCK:
2416                 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
2417                 break;
2418         default:
2419                 valid = false;
2420         }
2421
2422
2423         if (valid) {
2424                 env->insn_aux_data[insn_idx].ctx_field_size =
2425                         info.ctx_field_size;
2426                 return 0;
2427         }
2428
2429         verbose(env, "R%d invalid %s access off=%d size=%d\n",
2430                 regno, reg_type_str[reg->type], off, size);
2431
2432         return -EACCES;
2433 }
2434
2435 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
2436 {
2437         return cur_regs(env) + regno;
2438 }
2439
2440 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
2441 {
2442         return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
2443 }
2444
2445 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
2446 {
2447         const struct bpf_reg_state *reg = reg_state(env, regno);
2448
2449         return reg->type == PTR_TO_CTX;
2450 }
2451
2452 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
2453 {
2454         const struct bpf_reg_state *reg = reg_state(env, regno);
2455
2456         return type_is_sk_pointer(reg->type);
2457 }
2458
2459 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
2460 {
2461         const struct bpf_reg_state *reg = reg_state(env, regno);
2462
2463         return type_is_pkt_pointer(reg->type);
2464 }
2465
2466 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
2467 {
2468         const struct bpf_reg_state *reg = reg_state(env, regno);
2469
2470         /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
2471         return reg->type == PTR_TO_FLOW_KEYS;
2472 }
2473
2474 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
2475                                    const struct bpf_reg_state *reg,
2476                                    int off, int size, bool strict)
2477 {
2478         struct tnum reg_off;
2479         int ip_align;
2480
2481         /* Byte size accesses are always allowed. */
2482         if (!strict || size == 1)
2483                 return 0;
2484
2485         /* For platforms that do not have a Kconfig enabling
2486          * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
2487          * NET_IP_ALIGN is universally set to '2'.  And on platforms
2488          * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
2489          * to this code only in strict mode where we want to emulate
2490          * the NET_IP_ALIGN==2 checking.  Therefore use an
2491          * unconditional IP align value of '2'.
2492          */
2493         ip_align = 2;
2494
2495         reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
2496         if (!tnum_is_aligned(reg_off, size)) {
2497                 char tn_buf[48];
2498
2499                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2500                 verbose(env,
2501                         "misaligned packet access off %d+%s+%d+%d size %d\n",
2502                         ip_align, tn_buf, reg->off, off, size);
2503                 return -EACCES;
2504         }
2505
2506         return 0;
2507 }
2508
2509 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
2510                                        const struct bpf_reg_state *reg,
2511                                        const char *pointer_desc,
2512                                        int off, int size, bool strict)
2513 {
2514         struct tnum reg_off;
2515
2516         /* Byte size accesses are always allowed. */
2517         if (!strict || size == 1)
2518                 return 0;
2519
2520         reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
2521         if (!tnum_is_aligned(reg_off, size)) {
2522                 char tn_buf[48];
2523
2524                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2525                 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
2526                         pointer_desc, tn_buf, reg->off, off, size);
2527                 return -EACCES;
2528         }
2529
2530         return 0;
2531 }
2532
2533 static int check_ptr_alignment(struct bpf_verifier_env *env,
2534                                const struct bpf_reg_state *reg, int off,
2535                                int size, bool strict_alignment_once)
2536 {
2537         bool strict = env->strict_alignment || strict_alignment_once;
2538         const char *pointer_desc = "";
2539
2540         switch (reg->type) {
2541         case PTR_TO_PACKET:
2542         case PTR_TO_PACKET_META:
2543                 /* Special case, because of NET_IP_ALIGN. Given metadata sits
2544                  * right in front, treat it the very same way.
2545                  */
2546                 return check_pkt_ptr_alignment(env, reg, off, size, strict);
2547         case PTR_TO_FLOW_KEYS:
2548                 pointer_desc = "flow keys ";
2549                 break;
2550         case PTR_TO_MAP_VALUE:
2551                 pointer_desc = "value ";
2552                 break;
2553         case PTR_TO_CTX:
2554                 pointer_desc = "context ";
2555                 break;
2556         case PTR_TO_STACK:
2557                 pointer_desc = "stack ";
2558                 /* The stack spill tracking logic in check_stack_write()
2559                  * and check_stack_read() relies on stack accesses being
2560                  * aligned.
2561                  */
2562                 strict = true;
2563                 break;
2564         case PTR_TO_SOCKET:
2565                 pointer_desc = "sock ";
2566                 break;
2567         case PTR_TO_SOCK_COMMON:
2568                 pointer_desc = "sock_common ";
2569                 break;
2570         case PTR_TO_TCP_SOCK:
2571                 pointer_desc = "tcp_sock ";
2572                 break;
2573         case PTR_TO_XDP_SOCK:
2574                 pointer_desc = "xdp_sock ";
2575                 break;
2576         default:
2577                 break;
2578         }
2579         return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
2580                                            strict);
2581 }
2582
2583 static int update_stack_depth(struct bpf_verifier_env *env,
2584                               const struct bpf_func_state *func,
2585                               int off)
2586 {
2587         u16 stack = env->subprog_info[func->subprogno].stack_depth;
2588
2589         if (stack >= -off)
2590                 return 0;
2591
2592         /* update known max for given subprogram */
2593         env->subprog_info[func->subprogno].stack_depth = -off;
2594         return 0;
2595 }
2596
2597 /* starting from main bpf function walk all instructions of the function
2598  * and recursively walk all callees that given function can call.
2599  * Ignore jump and exit insns.
2600  * Since recursion is prevented by check_cfg() this algorithm
2601  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
2602  */
2603 static int check_max_stack_depth(struct bpf_verifier_env *env)
2604 {
2605         int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
2606         struct bpf_subprog_info *subprog = env->subprog_info;
2607         struct bpf_insn *insn = env->prog->insnsi;
2608         int ret_insn[MAX_CALL_FRAMES];
2609         int ret_prog[MAX_CALL_FRAMES];
2610
2611 process_func:
2612         /* protect against potential stack overflow that might happen when
2613          * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
2614          * depth for such case down to 256 so that the worst case scenario
2615          * would result in 8k stack size (32 which is tailcall limit * 256 =
2616          * 8k).
2617          *
2618          * To get the idea what might happen, see an example:
2619          * func1 -> sub rsp, 128
2620          *  subfunc1 -> sub rsp, 256
2621          *  tailcall1 -> add rsp, 256
2622          *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
2623          *   subfunc2 -> sub rsp, 64
2624          *   subfunc22 -> sub rsp, 128
2625          *   tailcall2 -> add rsp, 128
2626          *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
2627          *
2628          * tailcall will unwind the current stack frame but it will not get rid
2629          * of caller's stack as shown on the example above.
2630          */
2631         if (idx && subprog[idx].has_tail_call && depth >= 256) {
2632                 verbose(env,
2633                         "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
2634                         depth);
2635                 return -EACCES;
2636         }
2637         /* round up to 32-bytes, since this is granularity
2638          * of interpreter stack size
2639          */
2640         depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
2641         if (depth > MAX_BPF_STACK) {
2642                 verbose(env, "combined stack size of %d calls is %d. Too large\n",
2643                         frame + 1, depth);
2644                 return -EACCES;
2645         }
2646 continue_func:
2647         subprog_end = subprog[idx + 1].start;
2648         for (; i < subprog_end; i++) {
2649                 if (insn[i].code != (BPF_JMP | BPF_CALL))
2650                         continue;
2651                 if (insn[i].src_reg != BPF_PSEUDO_CALL)
2652                         continue;
2653                 /* remember insn and function to return to */
2654                 ret_insn[frame] = i + 1;
2655                 ret_prog[frame] = idx;
2656
2657                 /* find the callee */
2658                 i = i + insn[i].imm + 1;
2659                 idx = find_subprog(env, i);
2660                 if (idx < 0) {
2661                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
2662                                   i);
2663                         return -EFAULT;
2664                 }
2665                 frame++;
2666                 if (frame >= MAX_CALL_FRAMES) {
2667                         verbose(env, "the call stack of %d frames is too deep !\n",
2668                                 frame);
2669                         return -E2BIG;
2670                 }
2671                 goto process_func;
2672         }
2673         /* end of for() loop means the last insn of the 'subprog'
2674          * was reached. Doesn't matter whether it was JA or EXIT
2675          */
2676         if (frame == 0)
2677                 return 0;
2678         depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
2679         frame--;
2680         i = ret_insn[frame];
2681         idx = ret_prog[frame];
2682         goto continue_func;
2683 }
2684
2685 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
2686 static int get_callee_stack_depth(struct bpf_verifier_env *env,
2687                                   const struct bpf_insn *insn, int idx)
2688 {
2689         int start = idx + insn->imm + 1, subprog;
2690
2691         subprog = find_subprog(env, start);
2692         if (subprog < 0) {
2693                 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
2694                           start);
2695                 return -EFAULT;
2696         }
2697         return env->subprog_info[subprog].stack_depth;
2698 }
2699 #endif
2700
2701 static int check_ctx_reg(struct bpf_verifier_env *env,
2702                          const struct bpf_reg_state *reg, int regno)
2703 {
2704         /* Access to ctx or passing it to a helper is only allowed in
2705          * its original, unmodified form.
2706          */
2707
2708         if (reg->off) {
2709                 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
2710                         regno, reg->off);
2711                 return -EACCES;
2712         }
2713
2714         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
2715                 char tn_buf[48];
2716
2717                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2718                 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
2719                 return -EACCES;
2720         }
2721
2722         return 0;
2723 }
2724
2725 static int check_tp_buffer_access(struct bpf_verifier_env *env,
2726                                   const struct bpf_reg_state *reg,
2727                                   int regno, int off, int size)
2728 {
2729         if (off < 0) {
2730                 verbose(env,
2731                         "R%d invalid tracepoint buffer access: off=%d, size=%d",
2732                         regno, off, size);
2733                 return -EACCES;
2734         }
2735         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
2736                 char tn_buf[48];
2737
2738                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2739                 verbose(env,
2740                         "R%d invalid variable buffer offset: off=%d, var_off=%s",
2741                         regno, off, tn_buf);
2742                 return -EACCES;
2743         }
2744         if (off + size > env->prog->aux->max_tp_access)
2745                 env->prog->aux->max_tp_access = off + size;
2746
2747         return 0;
2748 }
2749
2750
2751 /* truncate register to smaller size (in bytes)
2752  * must be called with size < BPF_REG_SIZE
2753  */
2754 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
2755 {
2756         u64 mask;
2757
2758         /* clear high bits in bit representation */
2759         reg->var_off = tnum_cast(reg->var_off, size);
2760
2761         /* fix arithmetic bounds */
2762         mask = ((u64)1 << (size * 8)) - 1;
2763         if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
2764                 reg->umin_value &= mask;
2765                 reg->umax_value &= mask;
2766         } else {
2767                 reg->umin_value = 0;
2768                 reg->umax_value = mask;
2769         }
2770         reg->smin_value = reg->umin_value;
2771         reg->smax_value = reg->umax_value;
2772 }
2773
2774 static bool bpf_map_is_rdonly(const struct bpf_map *map)
2775 {
2776         return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
2777 }
2778
2779 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
2780 {
2781         void *ptr;
2782         u64 addr;
2783         int err;
2784
2785         err = map->ops->map_direct_value_addr(map, &addr, off);
2786         if (err)
2787                 return err;
2788         ptr = (void *)(long)addr + off;
2789
2790         switch (size) {
2791         case sizeof(u8):
2792                 *val = (u64)*(u8 *)ptr;
2793                 break;
2794         case sizeof(u16):
2795                 *val = (u64)*(u16 *)ptr;
2796                 break;
2797         case sizeof(u32):
2798                 *val = (u64)*(u32 *)ptr;
2799                 break;
2800         case sizeof(u64):
2801                 *val = *(u64 *)ptr;
2802                 break;
2803         default:
2804                 return -EINVAL;
2805         }
2806         return 0;
2807 }
2808
2809 /* check whether memory at (regno + off) is accessible for t = (read | write)
2810  * if t==write, value_regno is a register which value is stored into memory
2811  * if t==read, value_regno is a register which will receive the value from memory
2812  * if t==write && value_regno==-1, some unknown value is stored into memory
2813  * if t==read && value_regno==-1, don't care what we read from memory
2814  */
2815 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
2816                             int off, int bpf_size, enum bpf_access_type t,
2817                             int value_regno, bool strict_alignment_once)
2818 {
2819         struct bpf_reg_state *regs = cur_regs(env);
2820         struct bpf_reg_state *reg = regs + regno;
2821         struct bpf_func_state *state;
2822         int size, err = 0;
2823
2824         size = bpf_size_to_bytes(bpf_size);
2825         if (size < 0)
2826                 return size;
2827
2828         /* alignment checks will add in reg->off themselves */
2829         err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
2830         if (err)
2831                 return err;
2832
2833         /* for access checks, reg->off is just part of off */
2834         off += reg->off;
2835
2836         if (reg->type == PTR_TO_MAP_VALUE) {
2837                 if (t == BPF_WRITE && value_regno >= 0 &&
2838                     is_pointer_value(env, value_regno)) {
2839                         verbose(env, "R%d leaks addr into map\n", value_regno);
2840                         return -EACCES;
2841                 }
2842                 err = check_map_access_type(env, regno, off, size, t);
2843                 if (err)
2844                         return err;
2845                 err = check_map_access(env, regno, off, size, false);
2846                 if (!err && t == BPF_READ && value_regno >= 0) {
2847                         struct bpf_map *map = reg->map_ptr;
2848
2849                         /* if map is read-only, track its contents as scalars */
2850                         if (tnum_is_const(reg->var_off) &&
2851                             bpf_map_is_rdonly(map) &&
2852                             map->ops->map_direct_value_addr) {
2853                                 int map_off = off + reg->var_off.value;
2854                                 u64 val = 0;
2855
2856                                 err = bpf_map_direct_read(map, map_off, size,
2857                                                           &val);
2858                                 if (err)
2859                                         return err;
2860
2861                                 regs[value_regno].type = SCALAR_VALUE;
2862                                 __mark_reg_known(&regs[value_regno], val);
2863                         } else {
2864                                 mark_reg_unknown(env, regs, value_regno);
2865                         }
2866                 }
2867         } else if (reg->type == PTR_TO_CTX) {
2868                 enum bpf_reg_type reg_type = SCALAR_VALUE;
2869
2870                 if (t == BPF_WRITE && value_regno >= 0 &&
2871                     is_pointer_value(env, value_regno)) {
2872                         verbose(env, "R%d leaks addr into ctx\n", value_regno);
2873                         return -EACCES;
2874                 }
2875
2876                 err = check_ctx_reg(env, reg, regno);
2877                 if (err < 0)
2878                         return err;
2879
2880                 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
2881                 if (!err && t == BPF_READ && value_regno >= 0) {
2882                         /* ctx access returns either a scalar, or a
2883                          * PTR_TO_PACKET[_META,_END]. In the latter
2884                          * case, we know the offset is zero.
2885                          */
2886                         if (reg_type == SCALAR_VALUE) {
2887                                 mark_reg_unknown(env, regs, value_regno);
2888                         } else {
2889                                 mark_reg_known_zero(env, regs,
2890                                                     value_regno);
2891                                 if (reg_type_may_be_null(reg_type))
2892                                         regs[value_regno].id = ++env->id_gen;
2893                                 /* A load of ctx field could have different
2894                                  * actual load size with the one encoded in the
2895                                  * insn. When the dst is PTR, it is for sure not
2896                                  * a sub-register.
2897                                  */
2898                                 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
2899                         }
2900                         regs[value_regno].type = reg_type;
2901                 }
2902
2903         } else if (reg->type == PTR_TO_STACK) {
2904                 off += reg->var_off.value;
2905                 err = check_stack_access(env, reg, off, size);
2906                 if (err)
2907                         return err;
2908
2909                 state = func(env, reg);
2910                 err = update_stack_depth(env, state, off);
2911                 if (err)
2912                         return err;
2913
2914                 if (t == BPF_WRITE)
2915                         err = check_stack_write(env, state, off, size,
2916                                                 value_regno, insn_idx);
2917                 else
2918                         err = check_stack_read(env, state, off, size,
2919                                                value_regno);
2920         } else if (reg_is_pkt_pointer(reg)) {
2921                 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
2922                         verbose(env, "cannot write into packet\n");
2923                         return -EACCES;
2924                 }
2925                 if (t == BPF_WRITE && value_regno >= 0 &&
2926                     is_pointer_value(env, value_regno)) {
2927                         verbose(env, "R%d leaks addr into packet\n",
2928                                 value_regno);
2929                         return -EACCES;
2930                 }
2931                 err = check_packet_access(env, regno, off, size, false);
2932                 if (!err && t == BPF_READ && value_regno >= 0)
2933                         mark_reg_unknown(env, regs, value_regno);
2934         } else if (reg->type == PTR_TO_FLOW_KEYS) {
2935                 if (t == BPF_WRITE && value_regno >= 0 &&
2936                     is_pointer_value(env, value_regno)) {
2937                         verbose(env, "R%d leaks addr into flow keys\n",
2938                                 value_regno);
2939                         return -EACCES;
2940                 }
2941
2942                 err = check_flow_keys_access(env, off, size);
2943                 if (!err && t == BPF_READ && value_regno >= 0)
2944                         mark_reg_unknown(env, regs, value_regno);
2945         } else if (type_is_sk_pointer(reg->type)) {
2946                 if (t == BPF_WRITE) {
2947                         verbose(env, "R%d cannot write into %s\n",
2948                                 regno, reg_type_str[reg->type]);
2949                         return -EACCES;
2950                 }
2951                 err = check_sock_access(env, insn_idx, regno, off, size, t);
2952                 if (!err && value_regno >= 0)
2953                         mark_reg_unknown(env, regs, value_regno);
2954         } else if (reg->type == PTR_TO_TP_BUFFER) {
2955                 err = check_tp_buffer_access(env, reg, regno, off, size);
2956                 if (!err && t == BPF_READ && value_regno >= 0)
2957                         mark_reg_unknown(env, regs, value_regno);
2958         } else {
2959                 verbose(env, "R%d invalid mem access '%s'\n", regno,
2960                         reg_type_str[reg->type]);
2961                 return -EACCES;
2962         }
2963
2964         if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
2965             regs[value_regno].type == SCALAR_VALUE) {
2966                 /* b/h/w load zero-extends, mark upper bits as known 0 */
2967                 coerce_reg_to_size(&regs[value_regno], size);
2968         }
2969         return err;
2970 }
2971
2972 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
2973 {
2974         int err;
2975
2976         if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
2977             insn->imm != 0) {
2978                 verbose(env, "BPF_XADD uses reserved fields\n");
2979                 return -EINVAL;
2980         }
2981
2982         /* check src1 operand */
2983         err = check_reg_arg(env, insn->src_reg, SRC_OP);
2984         if (err)
2985                 return err;
2986
2987         /* check src2 operand */
2988         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
2989         if (err)
2990                 return err;
2991
2992         if (is_pointer_value(env, insn->src_reg)) {
2993                 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
2994                 return -EACCES;
2995         }
2996
2997         if (is_ctx_reg(env, insn->dst_reg) ||
2998             is_pkt_reg(env, insn->dst_reg) ||
2999             is_flow_key_reg(env, insn->dst_reg) ||
3000             is_sk_reg(env, insn->dst_reg)) {
3001                 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
3002                         insn->dst_reg,
3003                         reg_type_str[reg_state(env, insn->dst_reg)->type]);
3004                 return -EACCES;
3005         }
3006
3007         /* check whether atomic_add can read the memory */
3008         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
3009                                BPF_SIZE(insn->code), BPF_READ, -1, true);
3010         if (err)
3011                 return err;
3012
3013         /* check whether atomic_add can write into the same memory */
3014         return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
3015                                 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
3016 }
3017
3018 static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
3019                                   int off, int access_size,
3020                                   bool zero_size_allowed)
3021 {
3022         struct bpf_reg_state *reg = reg_state(env, regno);
3023
3024         if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
3025             access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
3026                 if (tnum_is_const(reg->var_off)) {
3027                         verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
3028                                 regno, off, access_size);
3029                 } else {
3030                         char tn_buf[48];
3031
3032                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3033                         verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n",
3034                                 regno, tn_buf, access_size);
3035                 }
3036                 return -EACCES;
3037         }
3038         return 0;
3039 }
3040
3041 /* when register 'regno' is passed into function that will read 'access_size'
3042  * bytes from that pointer, make sure that it's within stack boundary
3043  * and all elements of stack are initialized.
3044  * Unlike most pointer bounds-checking functions, this one doesn't take an
3045  * 'off' argument, so it has to add in reg->off itself.
3046  */
3047 static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
3048                                 int access_size, bool zero_size_allowed,
3049                                 struct bpf_call_arg_meta *meta)
3050 {
3051         struct bpf_reg_state *reg = reg_state(env, regno);
3052         struct bpf_func_state *state = func(env, reg);
3053         int err, min_off, max_off, i, j, slot, spi;
3054
3055         if (reg->type != PTR_TO_STACK) {
3056                 /* Allow zero-byte read from NULL, regardless of pointer type */
3057                 if (zero_size_allowed && access_size == 0 &&
3058                     register_is_null(reg))
3059                         return 0;
3060
3061                 verbose(env, "R%d type=%s expected=%s\n", regno,
3062                         reg_type_str[reg->type],
3063                         reg_type_str[PTR_TO_STACK]);
3064                 return -EACCES;
3065         }
3066
3067         if (tnum_is_const(reg->var_off)) {
3068                 min_off = max_off = reg->var_off.value + reg->off;
3069                 err = __check_stack_boundary(env, regno, min_off, access_size,
3070                                              zero_size_allowed);
3071                 if (err)
3072                         return err;
3073         } else {
3074                 /* Variable offset is prohibited for unprivileged mode for
3075                  * simplicity since it requires corresponding support in
3076                  * Spectre masking for stack ALU.
3077                  * See also retrieve_ptr_limit().
3078                  */
3079                 if (!env->allow_ptr_leaks) {
3080                         char tn_buf[48];
3081
3082                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3083                         verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n",
3084                                 regno, tn_buf);
3085                         return -EACCES;
3086                 }
3087                 /* Only initialized buffer on stack is allowed to be accessed
3088                  * with variable offset. With uninitialized buffer it's hard to
3089                  * guarantee that whole memory is marked as initialized on
3090                  * helper return since specific bounds are unknown what may
3091                  * cause uninitialized stack leaking.
3092                  */
3093                 if (meta && meta->raw_mode)
3094                         meta = NULL;
3095
3096                 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
3097                     reg->smax_value <= -BPF_MAX_VAR_OFF) {
3098                         verbose(env, "R%d unbounded indirect variable offset stack access\n",
3099                                 regno);
3100                         return -EACCES;
3101                 }
3102                 min_off = reg->smin_value + reg->off;
3103                 max_off = reg->smax_value + reg->off;
3104                 err = __check_stack_boundary(env, regno, min_off, access_size,
3105                                              zero_size_allowed);
3106                 if (err) {
3107                         verbose(env, "R%d min value is outside of stack bound\n",
3108                                 regno);
3109                         return err;
3110                 }
3111                 err = __check_stack_boundary(env, regno, max_off, access_size,
3112                                              zero_size_allowed);
3113                 if (err) {
3114                         verbose(env, "R%d max value is outside of stack bound\n",
3115                                 regno);
3116                         return err;
3117                 }
3118         }
3119
3120         if (meta && meta->raw_mode) {
3121                 meta->access_size = access_size;
3122                 meta->regno = regno;
3123                 return 0;
3124         }
3125
3126         for (i = min_off; i < max_off + access_size; i++) {
3127                 u8 *stype;
3128
3129                 slot = -i - 1;
3130                 spi = slot / BPF_REG_SIZE;
3131                 if (state->allocated_stack <= slot)
3132                         goto err;
3133                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3134                 if (*stype == STACK_MISC)
3135                         goto mark;
3136                 if (*stype == STACK_ZERO) {
3137                         /* helper can write anything into the stack */
3138                         *stype = STACK_MISC;
3139                         goto mark;
3140                 }
3141                 if (state->stack[spi].slot_type[0] == STACK_SPILL &&
3142                     state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
3143                         __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
3144                         for (j = 0; j < BPF_REG_SIZE; j++)
3145                                 state->stack[spi].slot_type[j] = STACK_MISC;
3146                         goto mark;
3147                 }
3148
3149 err:
3150                 if (tnum_is_const(reg->var_off)) {
3151                         verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
3152                                 min_off, i - min_off, access_size);
3153                 } else {
3154                         char tn_buf[48];
3155
3156                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3157                         verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n",
3158                                 tn_buf, i - min_off, access_size);
3159                 }
3160                 return -EACCES;
3161 mark:
3162                 /* reading any byte out of 8-byte 'spill_slot' will cause
3163                  * the whole slot to be marked as 'read'
3164                  */
3165                 mark_reg_read(env, &state->stack[spi].spilled_ptr,
3166                               state->stack[spi].spilled_ptr.parent,
3167                               REG_LIVE_READ64);
3168         }
3169         return update_stack_depth(env, state, min_off);
3170 }
3171
3172 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
3173                                    int access_size, bool zero_size_allowed,
3174                                    struct bpf_call_arg_meta *meta)
3175 {
3176         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
3177
3178         switch (reg->type) {
3179         case PTR_TO_PACKET:
3180         case PTR_TO_PACKET_META:
3181                 return check_packet_access(env, regno, reg->off, access_size,
3182                                            zero_size_allowed);
3183         case PTR_TO_MAP_VALUE:
3184                 if (check_map_access_type(env, regno, reg->off, access_size,
3185                                           meta && meta->raw_mode ? BPF_WRITE :
3186                                           BPF_READ))
3187                         return -EACCES;
3188                 return check_map_access(env, regno, reg->off, access_size,
3189                                         zero_size_allowed);
3190         default: /* scalar_value|ptr_to_stack or invalid ptr */
3191                 return check_stack_boundary(env, regno, access_size,
3192                                             zero_size_allowed, meta);
3193         }
3194 }
3195
3196 /* Implementation details:
3197  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
3198  * Two bpf_map_lookups (even with the same key) will have different reg->id.
3199  * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
3200  * value_or_null->value transition, since the verifier only cares about
3201  * the range of access to valid map value pointer and doesn't care about actual
3202  * address of the map element.
3203  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
3204  * reg->id > 0 after value_or_null->value transition. By doing so
3205  * two bpf_map_lookups will be considered two different pointers that
3206  * point to different bpf_spin_locks.
3207  * The verifier allows taking only one bpf_spin_lock at a time to avoid
3208  * dead-locks.
3209  * Since only one bpf_spin_lock is allowed the checks are simpler than
3210  * reg_is_refcounted() logic. The verifier needs to remember only
3211  * one spin_lock instead of array of acquired_refs.
3212  * cur_state->active_spin_lock remembers which map value element got locked
3213  * and clears it after bpf_spin_unlock.
3214  */
3215 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
3216                              bool is_lock)
3217 {
3218         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
3219         struct bpf_verifier_state *cur = env->cur_state;
3220         bool is_const = tnum_is_const(reg->var_off);
3221         struct bpf_map *map = reg->map_ptr;
3222         u64 val = reg->var_off.value;
3223
3224         if (reg->type != PTR_TO_MAP_VALUE) {
3225                 verbose(env, "R%d is not a pointer to map_value\n", regno);
3226                 return -EINVAL;
3227         }
3228         if (!is_const) {
3229                 verbose(env,
3230                         "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
3231                         regno);
3232                 return -EINVAL;
3233         }
3234         if (!map->btf) {
3235                 verbose(env,
3236                         "map '%s' has to have BTF in order to use bpf_spin_lock\n",
3237                         map->name);
3238                 return -EINVAL;
3239         }
3240         if (!map_value_has_spin_lock(map)) {
3241                 if (map->spin_lock_off == -E2BIG)
3242                         verbose(env,
3243                                 "map '%s' has more than one 'struct bpf_spin_lock'\n",
3244                                 map->name);
3245                 else if (map->spin_lock_off == -ENOENT)
3246                         verbose(env,
3247                                 "map '%s' doesn't have 'struct bpf_spin_lock'\n",
3248                                 map->name);
3249                 else
3250                         verbose(env,
3251                                 "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
3252                                 map->name);
3253                 return -EINVAL;
3254         }
3255         if (map->spin_lock_off != val + reg->off) {
3256                 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
3257                         val + reg->off);
3258                 return -EINVAL;
3259         }
3260         if (is_lock) {
3261                 if (cur->active_spin_lock) {
3262                         verbose(env,
3263                                 "Locking two bpf_spin_locks are not allowed\n");
3264                         return -EINVAL;
3265                 }
3266                 cur->active_spin_lock = reg->id;
3267         } else {
3268                 if (!cur->active_spin_lock) {
3269                         verbose(env, "bpf_spin_unlock without taking a lock\n");
3270                         return -EINVAL;
3271                 }
3272                 if (cur->active_spin_lock != reg->id) {
3273                         verbose(env, "bpf_spin_unlock of different lock\n");
3274                         return -EINVAL;
3275                 }
3276                 cur->active_spin_lock = 0;
3277         }
3278         return 0;
3279 }
3280
3281 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
3282 {
3283         return type == ARG_PTR_TO_MEM ||
3284                type == ARG_PTR_TO_MEM_OR_NULL ||
3285                type == ARG_PTR_TO_UNINIT_MEM;
3286 }
3287
3288 static bool arg_type_is_mem_size(enum bpf_arg_type type)
3289 {
3290         return type == ARG_CONST_SIZE ||
3291                type == ARG_CONST_SIZE_OR_ZERO;
3292 }
3293
3294 static bool arg_type_is_int_ptr(enum bpf_arg_type type)
3295 {
3296         return type == ARG_PTR_TO_INT ||
3297                type == ARG_PTR_TO_LONG;
3298 }
3299
3300 static int int_ptr_type_to_size(enum bpf_arg_type type)
3301 {
3302         if (type == ARG_PTR_TO_INT)
3303                 return sizeof(u32);
3304         else if (type == ARG_PTR_TO_LONG)
3305                 return sizeof(u64);
3306
3307         return -EINVAL;
3308 }
3309
3310 static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
3311                           enum bpf_arg_type arg_type,
3312                           struct bpf_call_arg_meta *meta)
3313 {
3314         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
3315         enum bpf_reg_type expected_type, type = reg->type;
3316         int err = 0;
3317
3318         if (arg_type == ARG_DONTCARE)
3319                 return 0;
3320
3321         err = check_reg_arg(env, regno, SRC_OP);
3322         if (err)
3323                 return err;
3324
3325         if (arg_type == ARG_ANYTHING) {
3326                 if (is_pointer_value(env, regno)) {
3327                         verbose(env, "R%d leaks addr into helper function\n",
3328                                 regno);
3329                         return -EACCES;
3330                 }
3331                 return 0;
3332         }
3333
3334         if (type_is_pkt_pointer(type) &&
3335             !may_access_direct_pkt_data(env, meta, BPF_READ)) {
3336                 verbose(env, "helper access to the packet is not allowed\n");
3337                 return -EACCES;
3338         }
3339
3340         if (arg_type == ARG_PTR_TO_MAP_KEY ||
3341             arg_type == ARG_PTR_TO_MAP_VALUE ||
3342             arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
3343             arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
3344                 expected_type = PTR_TO_STACK;
3345                 if (register_is_null(reg) &&
3346                     arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL)
3347                         /* final test in check_stack_boundary() */;
3348                 else if (!type_is_pkt_pointer(type) &&
3349                          type != PTR_TO_MAP_VALUE &&
3350                          type != expected_type)
3351                         goto err_type;
3352         } else if (arg_type == ARG_CONST_SIZE ||
3353                    arg_type == ARG_CONST_SIZE_OR_ZERO) {
3354                 expected_type = SCALAR_VALUE;
3355                 if (type != expected_type)
3356                         goto err_type;
3357         } else if (arg_type == ARG_CONST_MAP_PTR) {
3358                 expected_type = CONST_PTR_TO_MAP;
3359                 if (type != expected_type)
3360                         goto err_type;
3361         } else if (arg_type == ARG_PTR_TO_CTX) {
3362                 expected_type = PTR_TO_CTX;
3363                 if (type != expected_type)
3364                         goto err_type;
3365                 err = check_ctx_reg(env, reg, regno);
3366                 if (err < 0)
3367                         return err;
3368         } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
3369                 expected_type = PTR_TO_SOCK_COMMON;
3370                 /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
3371                 if (!type_is_sk_pointer(type))
3372                         goto err_type;
3373                 if (reg->ref_obj_id) {
3374                         if (meta->ref_obj_id) {
3375                                 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
3376                                         regno, reg->ref_obj_id,
3377                                         meta->ref_obj_id);
3378                                 return -EFAULT;
3379                         }
3380                         meta->ref_obj_id = reg->ref_obj_id;
3381                 }
3382         } else if (arg_type == ARG_PTR_TO_SOCKET) {
3383                 expected_type = PTR_TO_SOCKET;
3384                 if (type != expected_type)
3385                         goto err_type;
3386         } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
3387                 if (meta->func_id == BPF_FUNC_spin_lock) {
3388                         if (process_spin_lock(env, regno, true))
3389                                 return -EACCES;
3390                 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
3391                         if (process_spin_lock(env, regno, false))
3392                                 return -EACCES;
3393                 } else {
3394                         verbose(env, "verifier internal error\n");
3395                         return -EFAULT;
3396                 }
3397         } else if (arg_type_is_mem_ptr(arg_type)) {
3398                 expected_type = PTR_TO_STACK;
3399                 /* One exception here. In case function allows for NULL to be
3400                  * passed in as argument, it's a SCALAR_VALUE type. Final test
3401                  * happens during stack boundary checking.
3402                  */
3403                 if (register_is_null(reg) &&
3404                     arg_type == ARG_PTR_TO_MEM_OR_NULL)
3405                         /* final test in check_stack_boundary() */;
3406                 else if (!type_is_pkt_pointer(type) &&
3407                          type != PTR_TO_MAP_VALUE &&
3408                          type != expected_type)
3409                         goto err_type;
3410                 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
3411         } else if (arg_type_is_int_ptr(arg_type)) {
3412                 expected_type = PTR_TO_STACK;
3413                 if (!type_is_pkt_pointer(type) &&
3414                     type != PTR_TO_MAP_VALUE &&
3415                     type != expected_type)
3416                         goto err_type;
3417         } else {
3418                 verbose(env, "unsupported arg_type %d\n", arg_type);
3419                 return -EFAULT;
3420         }
3421
3422         if (arg_type == ARG_CONST_MAP_PTR) {
3423                 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
3424                 meta->map_ptr = reg->map_ptr;
3425         } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
3426                 /* bpf_map_xxx(..., map_ptr, ..., key) call:
3427                  * check that [key, key + map->key_size) are within
3428                  * stack limits and initialized
3429                  */
3430                 if (!meta->map_ptr) {
3431                         /* in function declaration map_ptr must come before
3432                          * map_key, so that it's verified and known before
3433                          * we have to check map_key here. Otherwise it means
3434                          * that kernel subsystem misconfigured verifier
3435                          */
3436                         verbose(env, "invalid map_ptr to access map->key\n");
3437                         return -EACCES;
3438                 }
3439                 err = check_helper_mem_access(env, regno,
3440                                               meta->map_ptr->key_size, false,
3441                                               NULL);
3442         } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
3443                    (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
3444                     !register_is_null(reg)) ||
3445                    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
3446                 /* bpf_map_xxx(..., map_ptr, ..., value) call:
3447                  * check [value, value + map->value_size) validity
3448                  */
3449                 if (!meta->map_ptr) {
3450                         /* kernel subsystem misconfigured verifier */
3451                         verbose(env, "invalid map_ptr to access map->value\n");
3452                         return -EACCES;
3453                 }
3454                 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
3455                 err = check_helper_mem_access(env, regno,
3456                                               meta->map_ptr->value_size, false,
3457                                               meta);
3458         } else if (arg_type_is_mem_size(arg_type)) {
3459                 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
3460
3461                 /* remember the mem_size which may be used later
3462                  * to refine return values.
3463                  */
3464                 meta->msize_max_value = reg->umax_value;
3465
3466                 /* The register is SCALAR_VALUE; the access check
3467                  * happens using its boundaries.
3468                  */
3469                 if (!tnum_is_const(reg->var_off))
3470                         /* For unprivileged variable accesses, disable raw
3471                          * mode so that the program is required to
3472                          * initialize all the memory that the helper could
3473                          * just partially fill up.
3474                          */
3475                         meta = NULL;
3476
3477                 if (reg->smin_value < 0) {
3478                         verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
3479                                 regno);
3480                         return -EACCES;
3481                 }
3482
3483                 if (reg->umin_value == 0) {
3484                         err = check_helper_mem_access(env, regno - 1, 0,
3485                                                       zero_size_allowed,
3486                                                       meta);
3487                         if (err)
3488                                 return err;
3489                 }
3490
3491                 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
3492                         verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
3493                                 regno);
3494                         return -EACCES;
3495                 }
3496                 err = check_helper_mem_access(env, regno - 1,
3497                                               reg->umax_value,
3498                                               zero_size_allowed, meta);
3499                 if (!err)
3500                         err = mark_chain_precision(env, regno);
3501         } else if (arg_type_is_int_ptr(arg_type)) {
3502                 int size = int_ptr_type_to_size(arg_type);
3503
3504                 err = check_helper_mem_access(env, regno, size, false, meta);
3505                 if (err)
3506                         return err;
3507                 err = check_ptr_alignment(env, reg, 0, size, true);
3508         }
3509
3510         return err;
3511 err_type:
3512         verbose(env, "R%d type=%s expected=%s\n", regno,
3513                 reg_type_str[type], reg_type_str[expected_type]);
3514         return -EACCES;
3515 }
3516
3517 static int check_map_func_compatibility(struct bpf_verifier_env *env,
3518                                         struct bpf_map *map, int func_id)
3519 {
3520         if (!map)
3521                 return 0;
3522
3523         /* We need a two way check, first is from map perspective ... */
3524         switch (map->map_type) {
3525         case BPF_MAP_TYPE_PROG_ARRAY:
3526                 if (func_id != BPF_FUNC_tail_call)
3527                         goto error;
3528                 break;
3529         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
3530                 if (func_id != BPF_FUNC_perf_event_read &&
3531                     func_id != BPF_FUNC_perf_event_output &&
3532                     func_id != BPF_FUNC_perf_event_read_value)
3533                         goto error;
3534                 break;
3535         case BPF_MAP_TYPE_STACK_TRACE:
3536                 if (func_id != BPF_FUNC_get_stackid)
3537                         goto error;
3538                 break;
3539         case BPF_MAP_TYPE_CGROUP_ARRAY:
3540                 if (func_id != BPF_FUNC_skb_under_cgroup &&
3541                     func_id != BPF_FUNC_current_task_under_cgroup)
3542                         goto error;
3543                 break;
3544         case BPF_MAP_TYPE_CGROUP_STORAGE:
3545         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
3546                 if (func_id != BPF_FUNC_get_local_storage)
3547                         goto error;
3548                 break;
3549         case BPF_MAP_TYPE_DEVMAP:
3550         case BPF_MAP_TYPE_DEVMAP_HASH:
3551                 if (func_id != BPF_FUNC_redirect_map &&
3552                     func_id != BPF_FUNC_map_lookup_elem)
3553                         goto error;
3554                 break;
3555         /* Restrict bpf side of cpumap and xskmap, open when use-cases
3556          * appear.
3557          */
3558         case BPF_MAP_TYPE_CPUMAP:
3559                 if (func_id != BPF_FUNC_redirect_map)
3560                         goto error;
3561                 break;
3562         case BPF_MAP_TYPE_XSKMAP:
3563                 if (func_id != BPF_FUNC_redirect_map &&
3564                     func_id != BPF_FUNC_map_lookup_elem)
3565                         goto error;
3566                 break;
3567         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
3568         case BPF_MAP_TYPE_HASH_OF_MAPS:
3569                 if (func_id != BPF_FUNC_map_lookup_elem)
3570                         goto error;
3571                 break;
3572         case BPF_MAP_TYPE_SOCKMAP:
3573                 if (func_id != BPF_FUNC_sk_redirect_map &&
3574                     func_id != BPF_FUNC_sock_map_update &&
3575                     func_id != BPF_FUNC_map_delete_elem &&
3576                     func_id != BPF_FUNC_msg_redirect_map)
3577                         goto error;
3578                 break;
3579         case BPF_MAP_TYPE_SOCKHASH:
3580                 if (func_id != BPF_FUNC_sk_redirect_hash &&
3581                     func_id != BPF_FUNC_sock_hash_update &&
3582                     func_id != BPF_FUNC_map_delete_elem &&
3583                     func_id != BPF_FUNC_msg_redirect_hash)
3584                         goto error;
3585                 break;
3586         case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
3587                 if (func_id != BPF_FUNC_sk_select_reuseport)
3588                         goto error;
3589                 break;
3590         case BPF_MAP_TYPE_QUEUE:
3591         case BPF_MAP_TYPE_STACK:
3592                 if (func_id != BPF_FUNC_map_peek_elem &&
3593                     func_id != BPF_FUNC_map_pop_elem &&
3594                     func_id != BPF_FUNC_map_push_elem)
3595                         goto error;
3596                 break;
3597         case BPF_MAP_TYPE_SK_STORAGE:
3598                 if (func_id != BPF_FUNC_sk_storage_get &&
3599                     func_id != BPF_FUNC_sk_storage_delete)
3600                         goto error;
3601                 break;
3602         default:
3603                 break;
3604         }
3605
3606         /* ... and second from the function itself. */
3607         switch (func_id) {
3608         case BPF_FUNC_tail_call:
3609                 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
3610                         goto error;
3611                 if (env->subprog_cnt > 1) {
3612                         verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
3613                         return -EINVAL;
3614                 }
3615                 break;
3616         case BPF_FUNC_perf_event_read:
3617         case BPF_FUNC_perf_event_output:
3618         case BPF_FUNC_perf_event_read_value:
3619                 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
3620                         goto error;
3621                 break;
3622         case BPF_FUNC_get_stackid:
3623                 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
3624                         goto error;
3625                 break;
3626         case BPF_FUNC_current_task_under_cgroup:
3627         case BPF_FUNC_skb_under_cgroup:
3628                 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
3629                         goto error;
3630                 break;
3631         case BPF_FUNC_redirect_map:
3632                 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
3633                     map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
3634                     map->map_type != BPF_MAP_TYPE_CPUMAP &&
3635                     map->map_type != BPF_MAP_TYPE_XSKMAP)
3636                         goto error;
3637                 break;
3638         case BPF_FUNC_sk_redirect_map:
3639         case BPF_FUNC_msg_redirect_map:
3640         case BPF_FUNC_sock_map_update:
3641                 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
3642                         goto error;
3643                 break;
3644         case BPF_FUNC_sk_redirect_hash:
3645         case BPF_FUNC_msg_redirect_hash:
3646         case BPF_FUNC_sock_hash_update:
3647                 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
3648                         goto error;
3649                 break;
3650         case BPF_FUNC_get_local_storage:
3651                 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
3652                     map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
3653                         goto error;
3654                 break;
3655         case BPF_FUNC_sk_select_reuseport:
3656                 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
3657                         goto error;
3658                 break;
3659         case BPF_FUNC_map_peek_elem:
3660         case BPF_FUNC_map_pop_elem:
3661         case BPF_FUNC_map_push_elem:
3662                 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
3663                     map->map_type != BPF_MAP_TYPE_STACK)
3664                         goto error;
3665                 break;
3666         case BPF_FUNC_sk_storage_get:
3667         case BPF_FUNC_sk_storage_delete:
3668                 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
3669                         goto error;
3670                 break;
3671         default:
3672                 break;
3673         }
3674
3675         return 0;
3676 error:
3677         verbose(env, "cannot pass map_type %d into func %s#%d\n",
3678                 map->map_type, func_id_name(func_id), func_id);
3679         return -EINVAL;
3680 }
3681
3682 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
3683 {
3684         int count = 0;
3685
3686         if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
3687                 count++;
3688         if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
3689                 count++;
3690         if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
3691                 count++;
3692         if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
3693                 count++;
3694         if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
3695                 count++;
3696
3697         /* We only support one arg being in raw mode at the moment,
3698          * which is sufficient for the helper functions we have
3699          * right now.
3700          */
3701         return count <= 1;
3702 }
3703
3704 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
3705                                     enum bpf_arg_type arg_next)
3706 {
3707         return (arg_type_is_mem_ptr(arg_curr) &&
3708                 !arg_type_is_mem_size(arg_next)) ||
3709                (!arg_type_is_mem_ptr(arg_curr) &&
3710                 arg_type_is_mem_size(arg_next));
3711 }
3712
3713 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
3714 {
3715         /* bpf_xxx(..., buf, len) call will access 'len'
3716          * bytes from memory 'buf'. Both arg types need
3717          * to be paired, so make sure there's no buggy
3718          * helper function specification.
3719          */
3720         if (arg_type_is_mem_size(fn->arg1_type) ||
3721             arg_type_is_mem_ptr(fn->arg5_type)  ||
3722             check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
3723             check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
3724             check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
3725             check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
3726                 return false;
3727
3728         return true;
3729 }
3730
3731 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
3732 {
3733         int count = 0;
3734
3735         if (arg_type_may_be_refcounted(fn->arg1_type))
3736                 count++;
3737         if (arg_type_may_be_refcounted(fn->arg2_type))
3738                 count++;
3739         if (arg_type_may_be_refcounted(fn->arg3_type))
3740                 count++;
3741         if (arg_type_may_be_refcounted(fn->arg4_type))
3742                 count++;
3743         if (arg_type_may_be_refcounted(fn->arg5_type))
3744                 count++;
3745
3746         /* A reference acquiring function cannot acquire
3747          * another refcounted ptr.
3748          */
3749         if (is_acquire_function(func_id) && count)
3750                 return false;
3751
3752         /* We only support one arg being unreferenced at the moment,
3753          * which is sufficient for the helper functions we have right now.
3754          */
3755         return count <= 1;
3756 }
3757
3758 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
3759 {
3760         return check_raw_mode_ok(fn) &&
3761                check_arg_pair_ok(fn) &&
3762                check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
3763 }
3764
3765 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
3766  * are now invalid, so turn them into unknown SCALAR_VALUE.
3767  */
3768 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
3769                                      struct bpf_func_state *state)
3770 {
3771         struct bpf_reg_state *regs = state->regs, *reg;
3772         int i;
3773
3774         for (i = 0; i < MAX_BPF_REG; i++)
3775                 if (reg_is_pkt_pointer_any(&regs[i]))
3776                         mark_reg_unknown(env, regs, i);
3777
3778         bpf_for_each_spilled_reg(i, state, reg) {
3779                 if (!reg)
3780                         continue;
3781                 if (reg_is_pkt_pointer_any(reg))
3782                         __mark_reg_unknown(env, reg);
3783         }
3784 }
3785
3786 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
3787 {
3788         struct bpf_verifier_state *vstate = env->cur_state;
3789         int i;
3790
3791         for (i = 0; i <= vstate->curframe; i++)
3792                 __clear_all_pkt_pointers(env, vstate->frame[i]);
3793 }
3794
3795 static void release_reg_references(struct bpf_verifier_env *env,
3796                                    struct bpf_func_state *state,
3797                                    int ref_obj_id)
3798 {
3799         struct bpf_reg_state *regs = state->regs, *reg;
3800         int i;
3801
3802         for (i = 0; i < MAX_BPF_REG; i++)
3803                 if (regs[i].ref_obj_id == ref_obj_id)
3804                         mark_reg_unknown(env, regs, i);
3805
3806         bpf_for_each_spilled_reg(i, state, reg) {
3807                 if (!reg)
3808                         continue;
3809                 if (reg->ref_obj_id == ref_obj_id)
3810                         __mark_reg_unknown(env, reg);
3811         }
3812 }
3813
3814 /* The pointer with the specified id has released its reference to kernel
3815  * resources. Identify all copies of the same pointer and clear the reference.
3816  */
3817 static int release_reference(struct bpf_verifier_env *env,
3818                              int ref_obj_id)
3819 {
3820         struct bpf_verifier_state *vstate = env->cur_state;
3821         int err;
3822         int i;
3823
3824         err = release_reference_state(cur_func(env), ref_obj_id);
3825         if (err)
3826                 return err;
3827
3828         for (i = 0; i <= vstate->curframe; i++)
3829                 release_reg_references(env, vstate->frame[i], ref_obj_id);
3830
3831         return 0;
3832 }
3833
3834 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
3835                            int *insn_idx)
3836 {
3837         struct bpf_verifier_state *state = env->cur_state;
3838         struct bpf_func_state *caller, *callee;
3839         int i, err, subprog, target_insn;
3840
3841         if (state->curframe + 1 >= MAX_CALL_FRAMES) {
3842                 verbose(env, "the call stack of %d frames is too deep\n",
3843                         state->curframe + 2);
3844                 return -E2BIG;
3845         }
3846
3847         target_insn = *insn_idx + insn->imm;
3848         subprog = find_subprog(env, target_insn + 1);
3849         if (subprog < 0) {
3850                 verbose(env, "verifier bug. No program starts at insn %d\n",
3851                         target_insn + 1);
3852                 return -EFAULT;
3853         }
3854
3855         caller = state->frame[state->curframe];
3856         if (state->frame[state->curframe + 1]) {
3857                 verbose(env, "verifier bug. Frame %d already allocated\n",
3858                         state->curframe + 1);
3859                 return -EFAULT;
3860         }
3861
3862         callee = kzalloc(sizeof(*callee), GFP_KERNEL);
3863         if (!callee)
3864                 return -ENOMEM;
3865         state->frame[state->curframe + 1] = callee;
3866
3867         /* callee cannot access r0, r6 - r9 for reading and has to write
3868          * into its own stack before reading from it.
3869          * callee can read/write into caller's stack
3870          */
3871         init_func_state(env, callee,
3872                         /* remember the callsite, it will be used by bpf_exit */
3873                         *insn_idx /* callsite */,
3874                         state->curframe + 1 /* frameno within this callchain */,
3875                         subprog /* subprog number within this prog */);
3876
3877         /* Transfer references to the callee */
3878         err = transfer_reference_state(callee, caller);
3879         if (err)
3880                 return err;
3881
3882         /* copy r1 - r5 args that callee can access.  The copy includes parent
3883          * pointers, which connects us up to the liveness chain
3884          */
3885         for (i = BPF_REG_1; i <= BPF_REG_5; i++)
3886                 callee->regs[i] = caller->regs[i];
3887
3888         /* after the call registers r0 - r5 were scratched */
3889         for (i = 0; i < CALLER_SAVED_REGS; i++) {
3890                 mark_reg_not_init(env, caller->regs, caller_saved[i]);
3891                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
3892         }
3893
3894         /* only increment it after check_reg_arg() finished */
3895         state->curframe++;
3896
3897         /* and go analyze first insn of the callee */
3898         *insn_idx = target_insn;
3899
3900         if (env->log.level & BPF_LOG_LEVEL) {
3901                 verbose(env, "caller:\n");
3902                 print_verifier_state(env, caller);
3903                 verbose(env, "callee:\n");
3904                 print_verifier_state(env, callee);
3905         }
3906         return 0;
3907 }
3908
3909 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
3910 {
3911         struct bpf_verifier_state *state = env->cur_state;
3912         struct bpf_func_state *caller, *callee;
3913         struct bpf_reg_state *r0;
3914         int err;
3915
3916         callee = state->frame[state->curframe];
3917         r0 = &callee->regs[BPF_REG_0];
3918         if (r0->type == PTR_TO_STACK) {
3919                 /* technically it's ok to return caller's stack pointer
3920                  * (or caller's caller's pointer) back to the caller,
3921                  * since these pointers are valid. Only current stack
3922                  * pointer will be invalid as soon as function exits,
3923                  * but let's be conservative
3924                  */
3925                 verbose(env, "cannot return stack pointer to the caller\n");
3926                 return -EINVAL;
3927         }
3928
3929         state->curframe--;
3930         caller = state->frame[state->curframe];
3931         /* return to the caller whatever r0 had in the callee */
3932         caller->regs[BPF_REG_0] = *r0;
3933
3934         /* Transfer references to the caller */
3935         err = transfer_reference_state(caller, callee);
3936         if (err)
3937                 return err;
3938
3939         *insn_idx = callee->callsite + 1;
3940         if (env->log.level & BPF_LOG_LEVEL) {
3941                 verbose(env, "returning from callee:\n");
3942                 print_verifier_state(env, callee);
3943                 verbose(env, "to caller at %d:\n", *insn_idx);
3944                 print_verifier_state(env, caller);
3945         }
3946         /* clear everything in the callee */
3947         free_func_state(callee);
3948         state->frame[state->curframe + 1] = NULL;
3949         return 0;
3950 }
3951
3952 static int do_refine_retval_range(struct bpf_verifier_env *env,
3953                                   struct bpf_reg_state *regs, int ret_type,
3954                                   int func_id, struct bpf_call_arg_meta *meta)
3955 {
3956         struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
3957         struct bpf_reg_state tmp_reg = *ret_reg;
3958         bool ret;
3959
3960         if (ret_type != RET_INTEGER ||
3961             (func_id != BPF_FUNC_get_stack &&
3962              func_id != BPF_FUNC_probe_read_str))
3963                 return 0;
3964
3965         /* Error case where ret is in interval [S32MIN, -1]. */
3966         ret_reg->smin_value = S32_MIN;
3967         ret_reg->smax_value = -1;
3968
3969         __reg_deduce_bounds(ret_reg);
3970         __reg_bound_offset(ret_reg);
3971         __update_reg_bounds(ret_reg);
3972
3973         ret = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
3974         if (!ret)
3975                 return -EFAULT;
3976
3977         *ret_reg = tmp_reg;
3978
3979         /* Success case where ret is in range [0, msize_max_value]. */
3980         ret_reg->smin_value = 0;
3981         ret_reg->smax_value = meta->msize_max_value;
3982         ret_reg->umin_value = ret_reg->smin_value;
3983         ret_reg->umax_value = ret_reg->smax_value;
3984
3985         __reg_deduce_bounds(ret_reg);
3986         __reg_bound_offset(ret_reg);
3987         __update_reg_bounds(ret_reg);
3988
3989         return 0;
3990 }
3991
3992 static int
3993 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
3994                 int func_id, int insn_idx)
3995 {
3996         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
3997         struct bpf_map *map = meta->map_ptr;
3998
3999         if (func_id != BPF_FUNC_tail_call &&
4000             func_id != BPF_FUNC_map_lookup_elem &&
4001             func_id != BPF_FUNC_map_update_elem &&
4002             func_id != BPF_FUNC_map_delete_elem &&
4003             func_id != BPF_FUNC_map_push_elem &&
4004             func_id != BPF_FUNC_map_pop_elem &&
4005             func_id != BPF_FUNC_map_peek_elem)
4006                 return 0;
4007
4008         if (map == NULL) {
4009                 verbose(env, "kernel subsystem misconfigured verifier\n");
4010                 return -EINVAL;
4011         }
4012
4013         /* In case of read-only, some additional restrictions
4014          * need to be applied in order to prevent altering the
4015          * state of the map from program side.
4016          */
4017         if ((map->map_flags & BPF_F_RDONLY_PROG) &&
4018             (func_id == BPF_FUNC_map_delete_elem ||
4019              func_id == BPF_FUNC_map_update_elem ||
4020              func_id == BPF_FUNC_map_push_elem ||
4021              func_id == BPF_FUNC_map_pop_elem)) {
4022                 verbose(env, "write into map forbidden\n");
4023                 return -EACCES;
4024         }
4025
4026         if (!BPF_MAP_PTR(aux->map_state))
4027                 bpf_map_ptr_store(aux, meta->map_ptr,
4028                                   meta->map_ptr->unpriv_array);
4029         else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
4030                 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
4031                                   meta->map_ptr->unpriv_array);
4032         return 0;
4033 }
4034
4035 static int check_reference_leak(struct bpf_verifier_env *env)
4036 {
4037         struct bpf_func_state *state = cur_func(env);
4038         int i;
4039
4040         for (i = 0; i < state->acquired_refs; i++) {
4041                 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
4042                         state->refs[i].id, state->refs[i].insn_idx);
4043         }
4044         return state->acquired_refs ? -EINVAL : 0;
4045 }
4046
4047 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
4048 {
4049         const struct bpf_func_proto *fn = NULL;
4050         struct bpf_reg_state *regs;
4051         struct bpf_call_arg_meta meta;
4052         bool changes_data;
4053         int i, err;
4054
4055         /* find function prototype */
4056         if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
4057                 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
4058                         func_id);
4059                 return -EINVAL;
4060         }
4061
4062         if (env->ops->get_func_proto)
4063                 fn = env->ops->get_func_proto(func_id, env->prog);
4064         if (!fn) {
4065                 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
4066                         func_id);
4067                 return -EINVAL;
4068         }
4069
4070         /* eBPF programs must be GPL compatible to use GPL-ed functions */
4071         if (!env->prog->gpl_compatible && fn->gpl_only) {
4072                 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
4073                 return -EINVAL;
4074         }
4075
4076         /* With LD_ABS/IND some JITs save/restore skb from r1. */
4077         changes_data = bpf_helper_changes_pkt_data(fn->func);
4078         if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
4079                 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
4080                         func_id_name(func_id), func_id);
4081                 return -EINVAL;
4082         }
4083
4084         memset(&meta, 0, sizeof(meta));
4085         meta.pkt_access = fn->pkt_access;
4086
4087         err = check_func_proto(fn, func_id);
4088         if (err) {
4089                 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
4090                         func_id_name(func_id), func_id);
4091                 return err;
4092         }
4093
4094         meta.func_id = func_id;
4095         /* check args */
4096         err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
4097         if (err)
4098                 return err;
4099         err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
4100         if (err)
4101                 return err;
4102         err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
4103         if (err)
4104                 return err;
4105         err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
4106         if (err)
4107                 return err;
4108         err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
4109         if (err)
4110                 return err;
4111
4112         err = record_func_map(env, &meta, func_id, insn_idx);
4113         if (err)
4114                 return err;
4115
4116         /* Mark slots with STACK_MISC in case of raw mode, stack offset
4117          * is inferred from register state.
4118          */
4119         for (i = 0; i < meta.access_size; i++) {
4120                 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
4121                                        BPF_WRITE, -1, false);
4122                 if (err)
4123                         return err;
4124         }
4125
4126         if (func_id == BPF_FUNC_tail_call) {
4127                 err = check_reference_leak(env);
4128                 if (err) {
4129                         verbose(env, "tail_call would lead to reference leak\n");
4130                         return err;
4131                 }
4132         } else if (is_release_function(func_id)) {
4133                 err = release_reference(env, meta.ref_obj_id);
4134                 if (err) {
4135                         verbose(env, "func %s#%d reference has not been acquired before\n",
4136                                 func_id_name(func_id), func_id);
4137                         return err;
4138                 }
4139         }
4140
4141         regs = cur_regs(env);
4142
4143         /* check that flags argument in get_local_storage(map, flags) is 0,
4144          * this is required because get_local_storage() can't return an error.
4145          */
4146         if (func_id == BPF_FUNC_get_local_storage &&
4147             !register_is_null(&regs[BPF_REG_2])) {
4148                 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
4149                 return -EINVAL;
4150         }
4151
4152         /* reset caller saved regs */
4153         for (i = 0; i < CALLER_SAVED_REGS; i++) {
4154                 mark_reg_not_init(env, regs, caller_saved[i]);
4155                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
4156         }
4157
4158         /* helper call returns 64-bit value. */
4159         regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
4160
4161         /* update return register (already marked as written above) */
4162         if (fn->ret_type == RET_INTEGER) {
4163                 /* sets type to SCALAR_VALUE */
4164                 mark_reg_unknown(env, regs, BPF_REG_0);
4165         } else if (fn->ret_type == RET_VOID) {
4166                 regs[BPF_REG_0].type = NOT_INIT;
4167         } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
4168                    fn->ret_type == RET_PTR_TO_MAP_VALUE) {
4169                 /* There is no offset yet applied, variable or fixed */
4170                 mark_reg_known_zero(env, regs, BPF_REG_0);
4171                 /* remember map_ptr, so that check_map_access()
4172                  * can check 'value_size' boundary of memory access
4173                  * to map element returned from bpf_map_lookup_elem()
4174                  */
4175                 if (meta.map_ptr == NULL) {
4176                         verbose(env,
4177                                 "kernel subsystem misconfigured verifier\n");
4178                         return -EINVAL;
4179                 }
4180                 regs[BPF_REG_0].map_ptr = meta.map_ptr;
4181                 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
4182                         regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
4183                         if (map_value_has_spin_lock(meta.map_ptr))
4184                                 regs[BPF_REG_0].id = ++env->id_gen;
4185                 } else {
4186                         regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
4187                         regs[BPF_REG_0].id = ++env->id_gen;
4188                 }
4189         } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
4190                 mark_reg_known_zero(env, regs, BPF_REG_0);
4191                 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
4192                 regs[BPF_REG_0].id = ++env->id_gen;
4193         } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
4194                 mark_reg_known_zero(env, regs, BPF_REG_0);
4195                 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
4196                 regs[BPF_REG_0].id = ++env->id_gen;
4197         } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
4198                 mark_reg_known_zero(env, regs, BPF_REG_0);
4199                 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
4200                 regs[BPF_REG_0].id = ++env->id_gen;
4201         } else {
4202                 verbose(env, "unknown return type %d of func %s#%d\n",
4203                         fn->ret_type, func_id_name(func_id), func_id);
4204                 return -EINVAL;
4205         }
4206
4207         if (is_ptr_cast_function(func_id)) {
4208                 /* For release_reference() */
4209                 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
4210         } else if (is_acquire_function(func_id)) {
4211                 int id = acquire_reference_state(env, insn_idx);
4212
4213                 if (id < 0)
4214                         return id;
4215                 /* For mark_ptr_or_null_reg() */
4216                 regs[BPF_REG_0].id = id;
4217                 /* For release_reference() */
4218                 regs[BPF_REG_0].ref_obj_id = id;
4219         }
4220
4221         err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
4222         if (err)
4223                 return err;
4224
4225         err = check_map_func_compatibility(env, meta.map_ptr, func_id);
4226         if (err)
4227                 return err;
4228
4229         if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
4230                 const char *err_str;
4231
4232 #ifdef CONFIG_PERF_EVENTS
4233                 err = get_callchain_buffers(sysctl_perf_event_max_stack);
4234                 err_str = "cannot get callchain buffer for func %s#%d\n";
4235 #else
4236                 err = -ENOTSUPP;
4237                 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
4238 #endif
4239                 if (err) {
4240                         verbose(env, err_str, func_id_name(func_id), func_id);
4241                         return err;
4242                 }
4243
4244                 env->prog->has_callchain_buf = true;
4245         }
4246
4247         if (changes_data)
4248                 clear_all_pkt_pointers(env);
4249         return 0;
4250 }
4251
4252 static bool signed_add_overflows(s64 a, s64 b)
4253 {
4254         /* Do the add in u64, where overflow is well-defined */
4255         s64 res = (s64)((u64)a + (u64)b);
4256
4257         if (b < 0)
4258                 return res > a;
4259         return res < a;
4260 }
4261
4262 static bool signed_sub_overflows(s64 a, s64 b)
4263 {
4264         /* Do the sub in u64, where overflow is well-defined */
4265         s64 res = (s64)((u64)a - (u64)b);
4266
4267         if (b < 0)
4268                 return res < a;
4269         return res > a;
4270 }
4271
4272 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
4273                                   const struct bpf_reg_state *reg,
4274                                   enum bpf_reg_type type)
4275 {
4276         bool known = tnum_is_const(reg->var_off);
4277         s64 val = reg->var_off.value;
4278         s64 smin = reg->smin_value;
4279
4280         if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
4281                 verbose(env, "math between %s pointer and %lld is not allowed\n",
4282                         reg_type_str[type], val);
4283                 return false;
4284         }
4285
4286         if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
4287                 verbose(env, "%s pointer offset %d is not allowed\n",
4288                         reg_type_str[type], reg->off);
4289                 return false;
4290         }
4291
4292         if (smin == S64_MIN) {
4293                 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
4294                         reg_type_str[type]);
4295                 return false;
4296         }
4297
4298         if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
4299                 verbose(env, "value %lld makes %s pointer be out of bounds\n",
4300                         smin, reg_type_str[type]);
4301                 return false;
4302         }
4303
4304         return true;
4305 }
4306
4307 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
4308 {
4309         return &env->insn_aux_data[env->insn_idx];
4310 }
4311
4312 enum {
4313         REASON_BOUNDS   = -1,
4314         REASON_TYPE     = -2,
4315         REASON_PATHS    = -3,
4316         REASON_LIMIT    = -4,
4317         REASON_STACK    = -5,
4318 };
4319
4320 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
4321                               u32 *alu_limit, bool mask_to_left)
4322 {
4323         u32 max = 0, ptr_limit = 0;
4324
4325         switch (ptr_reg->type) {
4326         case PTR_TO_STACK:
4327                 /* Offset 0 is out-of-bounds, but acceptable start for the
4328                  * left direction, see BPF_REG_FP. Also, unknown scalar
4329                  * offset where we would need to deal with min/max bounds is
4330                  * currently prohibited for unprivileged.
4331                  */
4332                 max = MAX_BPF_STACK + mask_to_left;
4333                 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
4334                 break;
4335         case PTR_TO_MAP_VALUE:
4336                 max = ptr_reg->map_ptr->value_size;
4337                 ptr_limit = (mask_to_left ?
4338                              ptr_reg->smin_value :
4339                              ptr_reg->umax_value) + ptr_reg->off;
4340                 break;
4341         default:
4342                 return REASON_TYPE;
4343         }
4344
4345         if (ptr_limit >= max)
4346                 return REASON_LIMIT;
4347         *alu_limit = ptr_limit;
4348         return 0;
4349 }
4350
4351 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
4352                                     const struct bpf_insn *insn)
4353 {
4354         return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
4355 }
4356
4357 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
4358                                        u32 alu_state, u32 alu_limit)
4359 {
4360         /* If we arrived here from different branches with different
4361          * state or limits to sanitize, then this won't work.
4362          */
4363         if (aux->alu_state &&
4364             (aux->alu_state != alu_state ||
4365              aux->alu_limit != alu_limit))
4366                 return REASON_PATHS;
4367
4368         /* Corresponding fixup done in fixup_bpf_calls(). */
4369         aux->alu_state = alu_state;
4370         aux->alu_limit = alu_limit;
4371         return 0;
4372 }
4373
4374 static int sanitize_val_alu(struct bpf_verifier_env *env,
4375                             struct bpf_insn *insn)
4376 {
4377         struct bpf_insn_aux_data *aux = cur_aux(env);
4378
4379         if (can_skip_alu_sanitation(env, insn))
4380                 return 0;
4381
4382         return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
4383 }
4384
4385 static bool sanitize_needed(u8 opcode)
4386 {
4387         return opcode == BPF_ADD || opcode == BPF_SUB;
4388 }
4389
4390 struct bpf_sanitize_info {
4391         struct bpf_insn_aux_data aux;
4392         bool mask_to_left;
4393 };
4394
4395 static struct bpf_verifier_state *
4396 sanitize_speculative_path(struct bpf_verifier_env *env,
4397                           const struct bpf_insn *insn,
4398                           u32 next_idx, u32 curr_idx)
4399 {
4400         struct bpf_verifier_state *branch;
4401         struct bpf_reg_state *regs;
4402
4403         branch = push_stack(env, next_idx, curr_idx, true);
4404         if (branch && insn) {
4405                 regs = branch->frame[branch->curframe]->regs;
4406                 if (BPF_SRC(insn->code) == BPF_K) {
4407                         mark_reg_unknown(env, regs, insn->dst_reg);
4408                 } else if (BPF_SRC(insn->code) == BPF_X) {
4409                         mark_reg_unknown(env, regs, insn->dst_reg);
4410                         mark_reg_unknown(env, regs, insn->src_reg);
4411                 }
4412         }
4413         return branch;
4414 }
4415
4416 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
4417                             struct bpf_insn *insn,
4418                             const struct bpf_reg_state *ptr_reg,
4419                             const struct bpf_reg_state *off_reg,
4420                             struct bpf_reg_state *dst_reg,
4421                             struct bpf_sanitize_info *info,
4422                             const bool commit_window)
4423 {
4424         struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
4425         struct bpf_verifier_state *vstate = env->cur_state;
4426         bool off_is_imm = tnum_is_const(off_reg->var_off);
4427         bool off_is_neg = off_reg->smin_value < 0;
4428         bool ptr_is_dst_reg = ptr_reg == dst_reg;
4429         u8 opcode = BPF_OP(insn->code);
4430         u32 alu_state, alu_limit;
4431         struct bpf_reg_state tmp;
4432         bool ret;
4433         int err;
4434
4435         if (can_skip_alu_sanitation(env, insn))
4436                 return 0;
4437
4438         /* We already marked aux for masking from non-speculative
4439          * paths, thus we got here in the first place. We only care
4440          * to explore bad access from here.
4441          */
4442         if (vstate->speculative)
4443                 goto do_sim;
4444
4445         if (!commit_window) {
4446                 if (!tnum_is_const(off_reg->var_off) &&
4447                     (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
4448                         return REASON_BOUNDS;
4449
4450                 info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
4451                                      (opcode == BPF_SUB && !off_is_neg);
4452         }
4453
4454         err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
4455         if (err < 0)
4456                 return err;
4457
4458         if (commit_window) {
4459                 /* In commit phase we narrow the masking window based on
4460                  * the observed pointer move after the simulated operation.
4461                  */
4462                 alu_state = info->aux.alu_state;
4463                 alu_limit = abs(info->aux.alu_limit - alu_limit);
4464         } else {
4465                 alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
4466                 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
4467                 alu_state |= ptr_is_dst_reg ?
4468                              BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
4469
4470                 /* Limit pruning on unknown scalars to enable deep search for
4471                  * potential masking differences from other program paths.
4472                  */
4473                 if (!off_is_imm)
4474                         env->explore_alu_limits = true;
4475         }
4476
4477         err = update_alu_sanitation_state(aux, alu_state, alu_limit);
4478         if (err < 0)
4479                 return err;
4480 do_sim:
4481         /* If we're in commit phase, we're done here given we already
4482          * pushed the truncated dst_reg into the speculative verification
4483          * stack.
4484          *
4485          * Also, when register is a known constant, we rewrite register-based
4486          * operation to immediate-based, and thus do not need masking (and as
4487          * a consequence, do not need to simulate the zero-truncation either).
4488          */
4489         if (commit_window || off_is_imm)
4490                 return 0;
4491
4492         /* Simulate and find potential out-of-bounds access under
4493          * speculative execution from truncation as a result of
4494          * masking when off was not within expected range. If off
4495          * sits in dst, then we temporarily need to move ptr there
4496          * to simulate dst (== 0) +/-= ptr. Needed, for example,
4497          * for cases where we use K-based arithmetic in one direction
4498          * and truncated reg-based in the other in order to explore
4499          * bad access.
4500          */
4501         if (!ptr_is_dst_reg) {
4502                 tmp = *dst_reg;
4503                 *dst_reg = *ptr_reg;
4504         }
4505         ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
4506                                         env->insn_idx);
4507         if (!ptr_is_dst_reg && ret)
4508                 *dst_reg = tmp;
4509         return !ret ? REASON_STACK : 0;
4510 }
4511
4512 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
4513 {
4514         struct bpf_verifier_state *vstate = env->cur_state;
4515
4516         /* If we simulate paths under speculation, we don't update the
4517          * insn as 'seen' such that when we verify unreachable paths in
4518          * the non-speculative domain, sanitize_dead_code() can still
4519          * rewrite/sanitize them.
4520          */
4521         if (!vstate->speculative)
4522                 env->insn_aux_data[env->insn_idx].seen = true;
4523 }
4524
4525 static int sanitize_err(struct bpf_verifier_env *env,
4526                         const struct bpf_insn *insn, int reason,
4527                         const struct bpf_reg_state *off_reg,
4528                         const struct bpf_reg_state *dst_reg)
4529 {
4530         static const char *err = "pointer arithmetic with it prohibited for !root";
4531         const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
4532         u32 dst = insn->dst_reg, src = insn->src_reg;
4533
4534         switch (reason) {
4535         case REASON_BOUNDS:
4536                 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
4537                         off_reg == dst_reg ? dst : src, err);
4538                 break;
4539         case REASON_TYPE:
4540                 verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
4541                         off_reg == dst_reg ? src : dst, err);
4542                 break;
4543         case REASON_PATHS:
4544                 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
4545                         dst, op, err);
4546                 break;
4547         case REASON_LIMIT:
4548                 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
4549                         dst, op, err);
4550                 break;
4551         case REASON_STACK:
4552                 verbose(env, "R%d could not be pushed for speculative verification, %s\n",
4553                         dst, err);
4554                 break;
4555         default:
4556                 verbose(env, "verifier internal error: unknown reason (%d)\n",
4557                         reason);
4558                 break;
4559         }
4560
4561         return -EACCES;
4562 }
4563
4564 static int sanitize_check_bounds(struct bpf_verifier_env *env,
4565                                  const struct bpf_insn *insn,
4566                                  const struct bpf_reg_state *dst_reg)
4567 {
4568         u32 dst = insn->dst_reg;
4569
4570         /* For unprivileged we require that resulting offset must be in bounds
4571          * in order to be able to sanitize access later on.
4572          */
4573         if (env->allow_ptr_leaks)
4574                 return 0;
4575
4576         switch (dst_reg->type) {
4577         case PTR_TO_STACK:
4578                 if (check_stack_access(env, dst_reg, dst_reg->off +
4579                                        dst_reg->var_off.value, 1)) {
4580                         verbose(env, "R%d stack pointer arithmetic goes out of range, "
4581                                 "prohibited for !root\n", dst);
4582                         return -EACCES;
4583                 }
4584                 break;
4585         case PTR_TO_MAP_VALUE:
4586                 if (check_map_access(env, dst, dst_reg->off, 1, false)) {
4587                         verbose(env, "R%d pointer arithmetic of map value goes out of range, "
4588                                 "prohibited for !root\n", dst);
4589                         return -EACCES;
4590                 }
4591                 break;
4592         default:
4593                 break;
4594         }
4595
4596         return 0;
4597 }
4598
4599 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
4600  * Caller should also handle BPF_MOV case separately.
4601  * If we return -EACCES, caller may want to try again treating pointer as a
4602  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
4603  */
4604 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
4605                                    struct bpf_insn *insn,
4606                                    const struct bpf_reg_state *ptr_reg,
4607                                    const struct bpf_reg_state *off_reg)
4608 {
4609         struct bpf_verifier_state *vstate = env->cur_state;
4610         struct bpf_func_state *state = vstate->frame[vstate->curframe];
4611         struct bpf_reg_state *regs = state->regs, *dst_reg;
4612         bool known = tnum_is_const(off_reg->var_off);
4613         s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
4614             smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
4615         u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
4616             umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
4617         struct bpf_sanitize_info info = {};
4618         u8 opcode = BPF_OP(insn->code);
4619         u32 dst = insn->dst_reg;
4620         int ret;
4621
4622         dst_reg = &regs[dst];
4623
4624         if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
4625             smin_val > smax_val || umin_val > umax_val) {
4626                 /* Taint dst register if offset had invalid bounds derived from
4627                  * e.g. dead branches.
4628                  */
4629                 __mark_reg_unknown(env, dst_reg);
4630                 return 0;
4631         }
4632
4633         if (BPF_CLASS(insn->code) != BPF_ALU64) {
4634                 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
4635                 verbose(env,
4636                         "R%d 32-bit pointer arithmetic prohibited\n",
4637                         dst);
4638                 return -EACCES;
4639         }
4640
4641         switch (ptr_reg->type) {
4642         case PTR_TO_MAP_VALUE_OR_NULL:
4643                 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
4644                         dst, reg_type_str[ptr_reg->type]);
4645                 return -EACCES;
4646         case CONST_PTR_TO_MAP:
4647                 /* smin_val represents the known value */
4648                 if (known && smin_val == 0 && opcode == BPF_ADD)
4649                         break;
4650                 /* fall-through */
4651         case PTR_TO_PACKET_END:
4652         case PTR_TO_SOCKET:
4653         case PTR_TO_SOCKET_OR_NULL:
4654         case PTR_TO_SOCK_COMMON:
4655         case PTR_TO_SOCK_COMMON_OR_NULL:
4656         case PTR_TO_TCP_SOCK:
4657         case PTR_TO_TCP_SOCK_OR_NULL:
4658         case PTR_TO_XDP_SOCK:
4659                 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
4660                         dst, reg_type_str[ptr_reg->type]);
4661                 return -EACCES;
4662         default:
4663                 break;
4664         }
4665
4666         /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
4667          * The id may be overwritten later if we create a new variable offset.
4668          */
4669         dst_reg->type = ptr_reg->type;
4670         dst_reg->id = ptr_reg->id;
4671
4672         if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
4673             !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
4674                 return -EINVAL;
4675
4676         if (sanitize_needed(opcode)) {
4677                 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
4678                                        &info, false);
4679                 if (ret < 0)
4680                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
4681         }
4682
4683         switch (opcode) {
4684         case BPF_ADD:
4685                 /* We can take a fixed offset as long as it doesn't overflow
4686                  * the s32 'off' field
4687                  */
4688                 if (known && (ptr_reg->off + smin_val ==
4689                               (s64)(s32)(ptr_reg->off + smin_val))) {
4690                         /* pointer += K.  Accumulate it into fixed offset */
4691                         dst_reg->smin_value = smin_ptr;
4692                         dst_reg->smax_value = smax_ptr;
4693                         dst_reg->umin_value = umin_ptr;
4694                         dst_reg->umax_value = umax_ptr;
4695                         dst_reg->var_off = ptr_reg->var_off;
4696                         dst_reg->off = ptr_reg->off + smin_val;
4697                         dst_reg->raw = ptr_reg->raw;
4698                         break;
4699                 }
4700                 /* A new variable offset is created.  Note that off_reg->off
4701                  * == 0, since it's a scalar.
4702                  * dst_reg gets the pointer type and since some positive
4703                  * integer value was added to the pointer, give it a new 'id'
4704                  * if it's a PTR_TO_PACKET.
4705                  * this creates a new 'base' pointer, off_reg (variable) gets
4706                  * added into the variable offset, and we copy the fixed offset
4707                  * from ptr_reg.
4708                  */
4709                 if (signed_add_overflows(smin_ptr, smin_val) ||
4710                     signed_add_overflows(smax_ptr, smax_val)) {
4711                         dst_reg->smin_value = S64_MIN;
4712                         dst_reg->smax_value = S64_MAX;
4713                 } else {
4714                         dst_reg->smin_value = smin_ptr + smin_val;
4715                         dst_reg->smax_value = smax_ptr + smax_val;
4716                 }
4717                 if (umin_ptr + umin_val < umin_ptr ||
4718                     umax_ptr + umax_val < umax_ptr) {
4719                         dst_reg->umin_value = 0;
4720                         dst_reg->umax_value = U64_MAX;
4721                 } else {
4722                         dst_reg->umin_value = umin_ptr + umin_val;
4723                         dst_reg->umax_value = umax_ptr + umax_val;
4724                 }
4725                 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
4726                 dst_reg->off = ptr_reg->off;
4727                 dst_reg->raw = ptr_reg->raw;
4728                 if (reg_is_pkt_pointer(ptr_reg)) {
4729                         dst_reg->id = ++env->id_gen;
4730                         /* something was added to pkt_ptr, set range to zero */
4731                         dst_reg->raw = 0;
4732                 }
4733                 break;
4734         case BPF_SUB:
4735                 if (dst_reg == off_reg) {
4736                         /* scalar -= pointer.  Creates an unknown scalar */
4737                         verbose(env, "R%d tried to subtract pointer from scalar\n",
4738                                 dst);
4739                         return -EACCES;
4740                 }
4741                 /* We don't allow subtraction from FP, because (according to
4742                  * test_verifier.c test "invalid fp arithmetic", JITs might not
4743                  * be able to deal with it.
4744                  */
4745                 if (ptr_reg->type == PTR_TO_STACK) {
4746                         verbose(env, "R%d subtraction from stack pointer prohibited\n",
4747                                 dst);
4748                         return -EACCES;
4749                 }
4750                 if (known && (ptr_reg->off - smin_val ==
4751                               (s64)(s32)(ptr_reg->off - smin_val))) {
4752                         /* pointer -= K.  Subtract it from fixed offset */
4753                         dst_reg->smin_value = smin_ptr;
4754                         dst_reg->smax_value = smax_ptr;
4755                         dst_reg->umin_value = umin_ptr;
4756                         dst_reg->umax_value = umax_ptr;
4757                         dst_reg->var_off = ptr_reg->var_off;
4758                         dst_reg->id = ptr_reg->id;
4759                         dst_reg->off = ptr_reg->off - smin_val;
4760                         dst_reg->raw = ptr_reg->raw;
4761                         break;
4762                 }
4763                 /* A new variable offset is created.  If the subtrahend is known
4764                  * nonnegative, then any reg->range we had before is still good.
4765                  */
4766                 if (signed_sub_overflows(smin_ptr, smax_val) ||
4767                     signed_sub_overflows(smax_ptr, smin_val)) {
4768                         /* Overflow possible, we know nothing */
4769                         dst_reg->smin_value = S64_MIN;
4770                         dst_reg->smax_value = S64_MAX;
4771                 } else {
4772                         dst_reg->smin_value = smin_ptr - smax_val;
4773                         dst_reg->smax_value = smax_ptr - smin_val;
4774                 }
4775                 if (umin_ptr < umax_val) {
4776                         /* Overflow possible, we know nothing */
4777                         dst_reg->umin_value = 0;
4778                         dst_reg->umax_value = U64_MAX;
4779                 } else {
4780                         /* Cannot overflow (as long as bounds are consistent) */
4781                         dst_reg->umin_value = umin_ptr - umax_val;
4782                         dst_reg->umax_value = umax_ptr - umin_val;
4783                 }
4784                 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
4785                 dst_reg->off = ptr_reg->off;
4786                 dst_reg->raw = ptr_reg->raw;
4787                 if (reg_is_pkt_pointer(ptr_reg)) {
4788                         dst_reg->id = ++env->id_gen;
4789                         /* something was added to pkt_ptr, set range to zero */
4790                         if (smin_val < 0)
4791                                 dst_reg->raw = 0;
4792                 }
4793                 break;
4794         case BPF_AND:
4795         case BPF_OR:
4796         case BPF_XOR:
4797                 /* bitwise ops on pointers are troublesome, prohibit. */
4798                 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
4799                         dst, bpf_alu_string[opcode >> 4]);
4800                 return -EACCES;
4801         default:
4802                 /* other operators (e.g. MUL,LSH) produce non-pointer results */
4803                 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
4804                         dst, bpf_alu_string[opcode >> 4]);
4805                 return -EACCES;
4806         }
4807
4808         if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
4809                 return -EINVAL;
4810
4811         __update_reg_bounds(dst_reg);
4812         __reg_deduce_bounds(dst_reg);
4813         __reg_bound_offset(dst_reg);
4814
4815         if (sanitize_check_bounds(env, insn, dst_reg) < 0)
4816                 return -EACCES;
4817         if (sanitize_needed(opcode)) {
4818                 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
4819                                        &info, true);
4820                 if (ret < 0)
4821                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
4822         }
4823
4824         return 0;
4825 }
4826
4827 /* WARNING: This function does calculations on 64-bit values, but the actual
4828  * execution may occur on 32-bit values. Therefore, things like bitshifts
4829  * need extra checks in the 32-bit case.
4830  */
4831 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
4832                                       struct bpf_insn *insn,
4833                                       struct bpf_reg_state *dst_reg,
4834                                       struct bpf_reg_state src_reg)
4835 {
4836         struct bpf_reg_state *regs = cur_regs(env);
4837         u8 opcode = BPF_OP(insn->code);
4838         bool src_known, dst_known;
4839         s64 smin_val, smax_val;
4840         u64 umin_val, umax_val;
4841         u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
4842         int ret;
4843
4844         if (insn_bitness == 32) {
4845                 /* Relevant for 32-bit RSH: Information can propagate towards
4846                  * LSB, so it isn't sufficient to only truncate the output to
4847                  * 32 bits.
4848                  */
4849                 coerce_reg_to_size(dst_reg, 4);
4850                 coerce_reg_to_size(&src_reg, 4);
4851         }
4852
4853         smin_val = src_reg.smin_value;
4854         smax_val = src_reg.smax_value;
4855         umin_val = src_reg.umin_value;
4856         umax_val = src_reg.umax_value;
4857         src_known = tnum_is_const(src_reg.var_off);
4858         dst_known = tnum_is_const(dst_reg->var_off);
4859
4860         if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
4861             smin_val > smax_val || umin_val > umax_val) {
4862                 /* Taint dst register if offset had invalid bounds derived from
4863                  * e.g. dead branches.
4864                  */
4865                 __mark_reg_unknown(env, dst_reg);
4866                 return 0;
4867         }
4868
4869         if (!src_known &&
4870             opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
4871                 __mark_reg_unknown(env, dst_reg);
4872                 return 0;
4873         }
4874
4875         if (sanitize_needed(opcode)) {
4876                 ret = sanitize_val_alu(env, insn);
4877                 if (ret < 0)
4878                         return sanitize_err(env, insn, ret, NULL, NULL);
4879         }
4880
4881         switch (opcode) {
4882         case BPF_ADD:
4883                 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
4884                     signed_add_overflows(dst_reg->smax_value, smax_val)) {
4885                         dst_reg->smin_value = S64_MIN;
4886                         dst_reg->smax_value = S64_MAX;
4887                 } else {
4888                         dst_reg->smin_value += smin_val;
4889                         dst_reg->smax_value += smax_val;
4890                 }
4891                 if (dst_reg->umin_value + umin_val < umin_val ||
4892                     dst_reg->umax_value + umax_val < umax_val) {
4893                         dst_reg->umin_value = 0;
4894                         dst_reg->umax_value = U64_MAX;
4895                 } else {
4896                         dst_reg->umin_value += umin_val;
4897                         dst_reg->umax_value += umax_val;
4898                 }
4899                 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
4900                 break;
4901         case BPF_SUB:
4902                 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
4903                     signed_sub_overflows(dst_reg->smax_value, smin_val)) {
4904                         /* Overflow possible, we know nothing */
4905                         dst_reg->smin_value = S64_MIN;
4906                         dst_reg->smax_value = S64_MAX;
4907                 } else {
4908                         dst_reg->smin_value -= smax_val;
4909                         dst_reg->smax_value -= smin_val;
4910                 }
4911                 if (dst_reg->umin_value < umax_val) {
4912                         /* Overflow possible, we know nothing */
4913                         dst_reg->umin_value = 0;
4914                         dst_reg->umax_value = U64_MAX;
4915                 } else {
4916                         /* Cannot overflow (as long as bounds are consistent) */
4917                         dst_reg->umin_value -= umax_val;
4918                         dst_reg->umax_value -= umin_val;
4919                 }
4920                 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
4921                 break;
4922         case BPF_MUL:
4923                 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
4924                 if (smin_val < 0 || dst_reg->smin_value < 0) {
4925                         /* Ain't nobody got time to multiply that sign */
4926                         __mark_reg_unbounded(dst_reg);
4927                         __update_reg_bounds(dst_reg);
4928                         break;
4929                 }
4930                 /* Both values are positive, so we can work with unsigned and
4931                  * copy the result to signed (unless it exceeds S64_MAX).
4932                  */
4933                 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
4934                         /* Potential overflow, we know nothing */
4935                         __mark_reg_unbounded(dst_reg);
4936                         /* (except what we can learn from the var_off) */
4937                         __update_reg_bounds(dst_reg);
4938                         break;
4939                 }
4940                 dst_reg->umin_value *= umin_val;
4941                 dst_reg->umax_value *= umax_val;
4942                 if (dst_reg->umax_value > S64_MAX) {
4943                         /* Overflow possible, we know nothing */
4944                         dst_reg->smin_value = S64_MIN;
4945                         dst_reg->smax_value = S64_MAX;
4946                 } else {
4947                         dst_reg->smin_value = dst_reg->umin_value;
4948                         dst_reg->smax_value = dst_reg->umax_value;
4949                 }
4950                 break;
4951         case BPF_AND:
4952                 if (src_known && dst_known) {
4953                         __mark_reg_known(dst_reg, dst_reg->var_off.value &
4954                                                   src_reg.var_off.value);
4955                         break;
4956                 }
4957                 /* We get our minimum from the var_off, since that's inherently
4958                  * bitwise.  Our maximum is the minimum of the operands' maxima.
4959                  */
4960                 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
4961                 dst_reg->umin_value = dst_reg->var_off.value;
4962                 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
4963                 if (dst_reg->smin_value < 0 || smin_val < 0) {
4964                         /* Lose signed bounds when ANDing negative numbers,
4965                          * ain't nobody got time for that.
4966                          */
4967                         dst_reg->smin_value = S64_MIN;
4968                         dst_reg->smax_value = S64_MAX;
4969                 } else {
4970                         /* ANDing two positives gives a positive, so safe to
4971                          * cast result into s64.
4972                          */
4973                         dst_reg->smin_value = dst_reg->umin_value;
4974                         dst_reg->smax_value = dst_reg->umax_value;
4975                 }
4976                 /* We may learn something more from the var_off */
4977                 __update_reg_bounds(dst_reg);
4978                 break;
4979         case BPF_OR:
4980                 if (src_known && dst_known) {
4981                         __mark_reg_known(dst_reg, dst_reg->var_off.value |
4982                                                   src_reg.var_off.value);
4983                         break;
4984                 }
4985                 /* We get our maximum from the var_off, and our minimum is the
4986                  * maximum of the operands' minima
4987                  */
4988                 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
4989                 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
4990                 dst_reg->umax_value = dst_reg->var_off.value |
4991                                       dst_reg->var_off.mask;
4992                 if (dst_reg->smin_value < 0 || smin_val < 0) {
4993                         /* Lose signed bounds when ORing negative numbers,
4994                          * ain't nobody got time for that.
4995                          */
4996                         dst_reg->smin_value = S64_MIN;
4997                         dst_reg->smax_value = S64_MAX;
4998                 } else {
4999                         /* ORing two positives gives a positive, so safe to
5000                          * cast result into s64.
5001                          */
5002                         dst_reg->smin_value = dst_reg->umin_value;
5003                         dst_reg->smax_value = dst_reg->umax_value;
5004                 }
5005                 /* We may learn something more from the var_off */
5006                 __update_reg_bounds(dst_reg);
5007                 break;
5008         case BPF_LSH:
5009                 if (umax_val >= insn_bitness) {
5010                         /* Shifts greater than 31 or 63 are undefined.
5011                          * This includes shifts by a negative number.
5012                          */
5013                         mark_reg_unknown(env, regs, insn->dst_reg);
5014                         break;
5015                 }
5016                 /* We lose all sign bit information (except what we can pick
5017                  * up from var_off)
5018                  */
5019                 dst_reg->smin_value = S64_MIN;
5020                 dst_reg->smax_value = S64_MAX;
5021                 /* If we might shift our top bit out, then we know nothing */
5022                 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
5023                         dst_reg->umin_value = 0;
5024                         dst_reg->umax_value = U64_MAX;
5025                 } else {
5026                         dst_reg->umin_value <<= umin_val;
5027                         dst_reg->umax_value <<= umax_val;
5028                 }
5029                 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
5030                 /* We may learn something more from the var_off */
5031                 __update_reg_bounds(dst_reg);
5032                 break;
5033         case BPF_RSH:
5034                 if (umax_val >= insn_bitness) {
5035                         /* Shifts greater than 31 or 63 are undefined.
5036                          * This includes shifts by a negative number.
5037                          */
5038                         mark_reg_unknown(env, regs, insn->dst_reg);
5039                         break;
5040                 }
5041                 /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
5042                  * be negative, then either:
5043                  * 1) src_reg might be zero, so the sign bit of the result is
5044                  *    unknown, so we lose our signed bounds
5045                  * 2) it's known negative, thus the unsigned bounds capture the
5046                  *    signed bounds
5047                  * 3) the signed bounds cross zero, so they tell us nothing
5048                  *    about the result
5049                  * If the value in dst_reg is known nonnegative, then again the
5050                  * unsigned bounts capture the signed bounds.
5051                  * Thus, in all cases it suffices to blow away our signed bounds
5052                  * and rely on inferring new ones from the unsigned bounds and
5053                  * var_off of the result.
5054                  */
5055                 dst_reg->smin_value = S64_MIN;
5056                 dst_reg->smax_value = S64_MAX;
5057                 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
5058                 dst_reg->umin_value >>= umax_val;
5059                 dst_reg->umax_value >>= umin_val;
5060                 /* We may learn something more from the var_off */
5061                 __update_reg_bounds(dst_reg);
5062                 break;
5063         case BPF_ARSH:
5064                 if (umax_val >= insn_bitness) {
5065                         /* Shifts greater than 31 or 63 are undefined.
5066                          * This includes shifts by a negative number.
5067                          */
5068                         mark_reg_unknown(env, regs, insn->dst_reg);
5069                         break;
5070                 }
5071
5072                 /* Upon reaching here, src_known is true and
5073                  * umax_val is equal to umin_val.
5074                  */
5075                 if (insn_bitness == 32) {
5076                         dst_reg->smin_value = (u32)(((s32)dst_reg->smin_value) >> umin_val);
5077                         dst_reg->smax_value = (u32)(((s32)dst_reg->smax_value) >> umin_val);
5078                 } else {
5079                         dst_reg->smin_value >>= umin_val;
5080                         dst_reg->smax_value >>= umin_val;
5081                 }
5082
5083                 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val,
5084                                                 insn_bitness);
5085
5086                 /* blow away the dst_reg umin_value/umax_value and rely on
5087                  * dst_reg var_off to refine the result.
5088                  */
5089                 dst_reg->umin_value = 0;
5090                 dst_reg->umax_value = U64_MAX;
5091                 __update_reg_bounds(dst_reg);
5092                 break;
5093         default:
5094                 mark_reg_unknown(env, regs, insn->dst_reg);
5095                 break;
5096         }
5097
5098         if (BPF_CLASS(insn->code) != BPF_ALU64) {
5099                 /* 32-bit ALU ops are (32,32)->32 */
5100                 coerce_reg_to_size(dst_reg, 4);
5101         }
5102
5103         __update_reg_bounds(dst_reg);
5104         __reg_deduce_bounds(dst_reg);
5105         __reg_bound_offset(dst_reg);
5106         return 0;
5107 }
5108
5109 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
5110  * and var_off.
5111  */
5112 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
5113                                    struct bpf_insn *insn)
5114 {
5115         struct bpf_verifier_state *vstate = env->cur_state;
5116         struct bpf_func_state *state = vstate->frame[vstate->curframe];
5117         struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
5118         struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
5119         u8 opcode = BPF_OP(insn->code);
5120         int err;
5121
5122         dst_reg = &regs[insn->dst_reg];
5123         src_reg = NULL;
5124         if (dst_reg->type != SCALAR_VALUE)
5125                 ptr_reg = dst_reg;
5126         if (BPF_SRC(insn->code) == BPF_X) {
5127                 src_reg = &regs[insn->src_reg];
5128                 if (src_reg->type != SCALAR_VALUE) {
5129                         if (dst_reg->type != SCALAR_VALUE) {
5130                                 /* Combining two pointers by any ALU op yields
5131                                  * an arbitrary scalar. Disallow all math except
5132                                  * pointer subtraction
5133                                  */
5134                                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
5135                                         mark_reg_unknown(env, regs, insn->dst_reg);
5136                                         return 0;
5137                                 }
5138                                 verbose(env, "R%d pointer %s pointer prohibited\n",
5139                                         insn->dst_reg,
5140                                         bpf_alu_string[opcode >> 4]);
5141                                 return -EACCES;
5142                         } else {
5143                                 /* scalar += pointer
5144                                  * This is legal, but we have to reverse our
5145                                  * src/dest handling in computing the range
5146                                  */
5147                                 err = mark_chain_precision(env, insn->dst_reg);
5148                                 if (err)
5149                                         return err;
5150                                 return adjust_ptr_min_max_vals(env, insn,
5151                                                                src_reg, dst_reg);
5152                         }
5153                 } else if (ptr_reg) {
5154                         /* pointer += scalar */
5155                         err = mark_chain_precision(env, insn->src_reg);
5156                         if (err)
5157                                 return err;
5158                         return adjust_ptr_min_max_vals(env, insn,
5159                                                        dst_reg, src_reg);
5160                 } else if (dst_reg->precise) {
5161                         /* if dst_reg is precise, src_reg should be precise as well */
5162                         err = mark_chain_precision(env, insn->src_reg);
5163                         if (err)
5164                                 return err;
5165                 }
5166         } else {
5167                 /* Pretend the src is a reg with a known value, since we only
5168                  * need to be able to read from this state.
5169                  */
5170                 off_reg.type = SCALAR_VALUE;
5171                 __mark_reg_known(&off_reg, insn->imm);
5172                 src_reg = &off_reg;
5173                 if (ptr_reg) /* pointer += K */
5174                         return adjust_ptr_min_max_vals(env, insn,
5175                                                        ptr_reg, src_reg);
5176         }
5177
5178         /* Got here implies adding two SCALAR_VALUEs */
5179         if (WARN_ON_ONCE(ptr_reg)) {
5180                 print_verifier_state(env, state);
5181                 verbose(env, "verifier internal error: unexpected ptr_reg\n");
5182                 return -EINVAL;
5183         }
5184         if (WARN_ON(!src_reg)) {
5185                 print_verifier_state(env, state);
5186                 verbose(env, "verifier internal error: no src_reg\n");
5187                 return -EINVAL;
5188         }
5189         return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
5190 }
5191
5192 /* check validity of 32-bit and 64-bit arithmetic operations */
5193 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
5194 {
5195         struct bpf_reg_state *regs = cur_regs(env);
5196         u8 opcode = BPF_OP(insn->code);
5197         int err;
5198
5199         if (opcode == BPF_END || opcode == BPF_NEG) {
5200                 if (opcode == BPF_NEG) {
5201                         if (BPF_SRC(insn->code) != 0 ||
5202                             insn->src_reg != BPF_REG_0 ||
5203                             insn->off != 0 || insn->imm != 0) {
5204                                 verbose(env, "BPF_NEG uses reserved fields\n");
5205                                 return -EINVAL;
5206                         }
5207                 } else {
5208                         if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
5209                             (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
5210                             BPF_CLASS(insn->code) == BPF_ALU64) {
5211                                 verbose(env, "BPF_END uses reserved fields\n");
5212                                 return -EINVAL;
5213                         }
5214                 }
5215
5216                 /* check src operand */
5217                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
5218                 if (err)
5219                         return err;
5220
5221                 if (is_pointer_value(env, insn->dst_reg)) {
5222                         verbose(env, "R%d pointer arithmetic prohibited\n",
5223                                 insn->dst_reg);
5224                         return -EACCES;
5225                 }
5226
5227                 /* check dest operand */
5228                 err = check_reg_arg(env, insn->dst_reg, DST_OP);
5229                 if (err)
5230                         return err;
5231
5232         } else if (opcode == BPF_MOV) {
5233
5234                 if (BPF_SRC(insn->code) == BPF_X) {
5235                         if (insn->imm != 0 || insn->off != 0) {
5236                                 verbose(env, "BPF_MOV uses reserved fields\n");
5237                                 return -EINVAL;
5238                         }
5239
5240                         /* check src operand */
5241                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
5242                         if (err)
5243                                 return err;
5244                 } else {
5245                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
5246                                 verbose(env, "BPF_MOV uses reserved fields\n");
5247                                 return -EINVAL;
5248                         }
5249                 }
5250
5251                 /* check dest operand, mark as required later */
5252                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
5253                 if (err)
5254                         return err;
5255
5256                 if (BPF_SRC(insn->code) == BPF_X) {
5257                         struct bpf_reg_state *src_reg = regs + insn->src_reg;
5258                         struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
5259
5260                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
5261                                 /* case: R1 = R2
5262                                  * copy register state to dest reg
5263                                  */
5264                                 *dst_reg = *src_reg;
5265                                 dst_reg->live |= REG_LIVE_WRITTEN;
5266                                 dst_reg->subreg_def = DEF_NOT_SUBREG;
5267                         } else {
5268                                 /* R1 = (u32) R2 */
5269                                 if (is_pointer_value(env, insn->src_reg)) {
5270                                         verbose(env,
5271                                                 "R%d partial copy of pointer\n",
5272                                                 insn->src_reg);
5273                                         return -EACCES;
5274                                 } else if (src_reg->type == SCALAR_VALUE) {
5275                                         *dst_reg = *src_reg;
5276                                         dst_reg->live |= REG_LIVE_WRITTEN;
5277                                         dst_reg->subreg_def = env->insn_idx + 1;
5278                                 } else {
5279                                         mark_reg_unknown(env, regs,
5280                                                          insn->dst_reg);
5281                                 }
5282                                 coerce_reg_to_size(dst_reg, 4);
5283                         }
5284                 } else {
5285                         /* case: R = imm
5286                          * remember the value we stored into this reg
5287                          */
5288                         /* clear any state __mark_reg_known doesn't set */
5289                         mark_reg_unknown(env, regs, insn->dst_reg);
5290                         regs[insn->dst_reg].type = SCALAR_VALUE;
5291                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
5292                                 __mark_reg_known(regs + insn->dst_reg,
5293                                                  insn->imm);
5294                         } else {
5295                                 __mark_reg_known(regs + insn->dst_reg,
5296                                                  (u32)insn->imm);
5297                         }
5298                 }
5299
5300         } else if (opcode > BPF_END) {
5301                 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
5302                 return -EINVAL;
5303
5304         } else {        /* all other ALU ops: and, sub, xor, add, ... */
5305
5306                 if (BPF_SRC(insn->code) == BPF_X) {
5307                         if (insn->imm != 0 || insn->off != 0) {
5308                                 verbose(env, "BPF_ALU uses reserved fields\n");
5309                                 return -EINVAL;
5310                         }
5311                         /* check src1 operand */
5312                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
5313                         if (err)
5314                                 return err;
5315                 } else {
5316                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
5317                                 verbose(env, "BPF_ALU uses reserved fields\n");
5318                                 return -EINVAL;
5319                         }
5320                 }
5321
5322                 /* check src2 operand */
5323                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
5324                 if (err)
5325                         return err;
5326
5327                 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
5328                     BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
5329                         verbose(env, "div by zero\n");
5330                         return -EINVAL;
5331                 }
5332
5333                 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
5334                      opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
5335                         int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
5336
5337                         if (insn->imm < 0 || insn->imm >= size) {
5338                                 verbose(env, "invalid shift %d\n", insn->imm);
5339                                 return -EINVAL;
5340                         }
5341                 }
5342
5343                 /* check dest operand */
5344                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
5345                 if (err)
5346                         return err;
5347
5348                 return adjust_reg_min_max_vals(env, insn);
5349         }
5350
5351         return 0;
5352 }
5353
5354 static void __find_good_pkt_pointers(struct bpf_func_state *state,
5355                                      struct bpf_reg_state *dst_reg,
5356                                      enum bpf_reg_type type, u16 new_range)
5357 {
5358         struct bpf_reg_state *reg;
5359         int i;
5360
5361         for (i = 0; i < MAX_BPF_REG; i++) {
5362                 reg = &state->regs[i];
5363                 if (reg->type == type && reg->id == dst_reg->id)
5364                         /* keep the maximum range already checked */
5365                         reg->range = max(reg->range, new_range);
5366         }
5367
5368         bpf_for_each_spilled_reg(i, state, reg) {
5369                 if (!reg)
5370                         continue;
5371                 if (reg->type == type && reg->id == dst_reg->id)
5372                         reg->range = max(reg->range, new_range);
5373         }
5374 }
5375
5376 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
5377                                    struct bpf_reg_state *dst_reg,
5378                                    enum bpf_reg_type type,
5379                                    bool range_right_open)
5380 {
5381         u16 new_range;
5382         int i;
5383
5384         if (dst_reg->off < 0 ||
5385             (dst_reg->off == 0 && range_right_open))
5386                 /* This doesn't give us any range */
5387                 return;
5388
5389         if (dst_reg->umax_value > MAX_PACKET_OFF ||
5390             dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
5391                 /* Risk of overflow.  For instance, ptr + (1<<63) may be less
5392                  * than pkt_end, but that's because it's also less than pkt.
5393                  */
5394                 return;
5395
5396         new_range = dst_reg->off;
5397         if (range_right_open)
5398                 new_range++;
5399
5400         /* Examples for register markings:
5401          *
5402          * pkt_data in dst register:
5403          *
5404          *   r2 = r3;
5405          *   r2 += 8;
5406          *   if (r2 > pkt_end) goto <handle exception>
5407          *   <access okay>
5408          *
5409          *   r2 = r3;
5410          *   r2 += 8;
5411          *   if (r2 < pkt_end) goto <access okay>
5412          *   <handle exception>
5413          *
5414          *   Where:
5415          *     r2 == dst_reg, pkt_end == src_reg
5416          *     r2=pkt(id=n,off=8,r=0)
5417          *     r3=pkt(id=n,off=0,r=0)
5418          *
5419          * pkt_data in src register:
5420          *
5421          *   r2 = r3;
5422          *   r2 += 8;
5423          *   if (pkt_end >= r2) goto <access okay>
5424          *   <handle exception>
5425          *
5426          *   r2 = r3;
5427          *   r2 += 8;
5428          *   if (pkt_end <= r2) goto <handle exception>
5429          *   <access okay>
5430          *
5431          *   Where:
5432          *     pkt_end == dst_reg, r2 == src_reg
5433          *     r2=pkt(id=n,off=8,r=0)
5434          *     r3=pkt(id=n,off=0,r=0)
5435          *
5436          * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
5437          * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
5438          * and [r3, r3 + 8-1) respectively is safe to access depending on
5439          * the check.
5440          */
5441
5442         /* If our ids match, then we must have the same max_value.  And we
5443          * don't care about the other reg's fixed offset, since if it's too big
5444          * the range won't allow anything.
5445          * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
5446          */
5447         for (i = 0; i <= vstate->curframe; i++)
5448                 __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
5449                                          new_range);
5450 }
5451
5452 /* compute branch direction of the expression "if (reg opcode val) goto target;"
5453  * and return:
5454  *  1 - branch will be taken and "goto target" will be executed
5455  *  0 - branch will not be taken and fall-through to next insn
5456  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
5457  */
5458 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
5459                            bool is_jmp32)
5460 {
5461         struct bpf_reg_state reg_lo;
5462         s64 sval;
5463
5464         if (__is_pointer_value(false, reg))
5465                 return -1;
5466
5467         if (is_jmp32) {
5468                 reg_lo = *reg;
5469                 reg = &reg_lo;
5470                 /* For JMP32, only low 32 bits are compared, coerce_reg_to_size
5471                  * could truncate high bits and update umin/umax according to
5472                  * information of low bits.
5473                  */
5474                 coerce_reg_to_size(reg, 4);
5475                 /* smin/smax need special handling. For example, after coerce,
5476                  * if smin_value is 0x00000000ffffffffLL, the value is -1 when
5477                  * used as operand to JMP32. It is a negative number from s32's
5478                  * point of view, while it is a positive number when seen as
5479                  * s64. The smin/smax are kept as s64, therefore, when used with
5480                  * JMP32, they need to be transformed into s32, then sign
5481                  * extended back to s64.
5482                  *
5483                  * Also, smin/smax were copied from umin/umax. If umin/umax has
5484                  * different sign bit, then min/max relationship doesn't
5485                  * maintain after casting into s32, for this case, set smin/smax
5486                  * to safest range.
5487                  */
5488                 if ((reg->umax_value ^ reg->umin_value) &
5489                     (1ULL << 31)) {
5490                         reg->smin_value = S32_MIN;
5491                         reg->smax_value = S32_MAX;
5492                 }
5493                 reg->smin_value = (s64)(s32)reg->smin_value;
5494                 reg->smax_value = (s64)(s32)reg->smax_value;
5495
5496                 val = (u32)val;
5497                 sval = (s64)(s32)val;
5498         } else {
5499                 sval = (s64)val;
5500         }
5501
5502         switch (opcode) {
5503         case BPF_JEQ:
5504                 if (tnum_is_const(reg->var_off))
5505                         return !!tnum_equals_const(reg->var_off, val);
5506                 break;
5507         case BPF_JNE:
5508                 if (tnum_is_const(reg->var_off))
5509                         return !tnum_equals_const(reg->var_off, val);
5510                 break;
5511         case BPF_JSET:
5512                 if ((~reg->var_off.mask & reg->var_off.value) & val)
5513                         return 1;
5514                 if (!((reg->var_off.mask | reg->var_off.value) & val))
5515                         return 0;
5516                 break;
5517         case BPF_JGT:
5518                 if (reg->umin_value > val)
5519                         return 1;
5520                 else if (reg->umax_value <= val)
5521                         return 0;
5522                 break;
5523         case BPF_JSGT:
5524                 if (reg->smin_value > sval)
5525                         return 1;
5526                 else if (reg->smax_value < sval)
5527                         return 0;
5528                 break;
5529         case BPF_JLT:
5530                 if (reg->umax_value < val)
5531                         return 1;
5532                 else if (reg->umin_value >= val)
5533                         return 0;
5534                 break;
5535         case BPF_JSLT:
5536                 if (reg->smax_value < sval)
5537                         return 1;
5538                 else if (reg->smin_value >= sval)
5539                         return 0;
5540                 break;
5541         case BPF_JGE:
5542                 if (reg->umin_value >= val)
5543                         return 1;
5544                 else if (reg->umax_value < val)
5545                         return 0;
5546                 break;
5547         case BPF_JSGE:
5548                 if (reg->smin_value >= sval)
5549                         return 1;
5550                 else if (reg->smax_value < sval)
5551                         return 0;
5552                 break;
5553         case BPF_JLE:
5554                 if (reg->umax_value <= val)
5555                         return 1;
5556                 else if (reg->umin_value > val)
5557                         return 0;
5558                 break;
5559         case BPF_JSLE:
5560                 if (reg->smax_value <= sval)
5561                         return 1;
5562                 else if (reg->smin_value > sval)
5563                         return 0;
5564                 break;
5565         }
5566
5567         return -1;
5568 }
5569
5570 /* Generate min value of the high 32-bit from TNUM info. */
5571 static u64 gen_hi_min(struct tnum var)
5572 {
5573         return var.value & ~0xffffffffULL;
5574 }
5575
5576 /* Generate max value of the high 32-bit from TNUM info. */
5577 static u64 gen_hi_max(struct tnum var)
5578 {
5579         return (var.value | var.mask) & ~0xffffffffULL;
5580 }
5581
5582 /* Return true if VAL is compared with a s64 sign extended from s32, and they
5583  * are with the same signedness.
5584  */
5585 static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg)
5586 {
5587         return ((s32)sval >= 0 &&
5588                 reg->smin_value >= 0 && reg->smax_value <= S32_MAX) ||
5589                ((s32)sval < 0 &&
5590                 reg->smax_value <= 0 && reg->smin_value >= S32_MIN);
5591 }
5592
5593 /* Constrain the possible values of @reg with unsigned upper bound @bound.
5594  * If @is_exclusive, @bound is an exclusive limit, otherwise it is inclusive.
5595  * If @is_jmp32, @bound is a 32-bit value that only constrains the low 32 bits
5596  * of @reg.
5597  */
5598 static void set_upper_bound(struct bpf_reg_state *reg, u64 bound, bool is_jmp32,
5599                             bool is_exclusive)
5600 {
5601         if (is_exclusive) {
5602                 /* There are no values for `reg` that make `reg<0` true. */
5603                 if (bound == 0)
5604                         return;
5605                 bound--;
5606         }
5607         if (is_jmp32) {
5608                 /* Constrain the register's value in the tnum representation.
5609                  * For 64-bit comparisons this happens later in
5610                  * __reg_bound_offset(), but for 32-bit comparisons, we can be
5611                  * more precise than what can be derived from the updated
5612                  * numeric bounds.
5613                  */
5614                 struct tnum t = tnum_range(0, bound);
5615
5616                 t.mask |= ~0xffffffffULL; /* upper half is unknown */
5617                 reg->var_off = tnum_intersect(reg->var_off, t);
5618
5619                 /* Compute the 64-bit bound from the 32-bit bound. */
5620                 bound += gen_hi_max(reg->var_off);
5621         }
5622         reg->umax_value = min(reg->umax_value, bound);
5623 }
5624
5625 /* Constrain the possible values of @reg with unsigned lower bound @bound.
5626  * If @is_exclusive, @bound is an exclusive limit, otherwise it is inclusive.
5627  * If @is_jmp32, @bound is a 32-bit value that only constrains the low 32 bits
5628  * of @reg.
5629  */
5630 static void set_lower_bound(struct bpf_reg_state *reg, u64 bound, bool is_jmp32,
5631                             bool is_exclusive)
5632 {
5633         if (is_exclusive) {
5634                 /* There are no values for `reg` that make `reg>MAX` true. */
5635                 if (bound == (is_jmp32 ? U32_MAX : U64_MAX))
5636                         return;
5637                 bound++;
5638         }
5639         if (is_jmp32) {
5640                 /* Constrain the register's value in the tnum representation.
5641                  * For 64-bit comparisons this happens later in
5642                  * __reg_bound_offset(), but for 32-bit comparisons, we can be
5643                  * more precise than what can be derived from the updated
5644                  * numeric bounds.
5645                  */
5646                 struct tnum t = tnum_range(bound, U32_MAX);
5647
5648                 t.mask |= ~0xffffffffULL; /* upper half is unknown */
5649                 reg->var_off = tnum_intersect(reg->var_off, t);
5650
5651                 /* Compute the 64-bit bound from the 32-bit bound. */
5652                 bound += gen_hi_min(reg->var_off);
5653         }
5654         reg->umin_value = max(reg->umin_value, bound);
5655 }
5656
5657 /* Adjusts the register min/max values in the case that the dst_reg is the
5658  * variable register that we are working on, and src_reg is a constant or we're
5659  * simply doing a BPF_K check.
5660  * In JEQ/JNE cases we also adjust the var_off values.
5661  */
5662 static void reg_set_min_max(struct bpf_reg_state *true_reg,
5663                             struct bpf_reg_state *false_reg, u64 val,
5664                             u8 opcode, bool is_jmp32)
5665 {
5666         s64 sval;
5667
5668         /* If the dst_reg is a pointer, we can't learn anything about its
5669          * variable offset from the compare (unless src_reg were a pointer into
5670          * the same object, but we don't bother with that.
5671          * Since false_reg and true_reg have the same type by construction, we
5672          * only need to check one of them for pointerness.
5673          */
5674         if (__is_pointer_value(false, false_reg))
5675                 return;
5676
5677         val = is_jmp32 ? (u32)val : val;
5678         sval = is_jmp32 ? (s64)(s32)val : (s64)val;
5679
5680         switch (opcode) {
5681         case BPF_JEQ:
5682         case BPF_JNE:
5683         {
5684                 struct bpf_reg_state *reg =
5685                         opcode == BPF_JEQ ? true_reg : false_reg;
5686
5687                 /* For BPF_JEQ, if this is false we know nothing Jon Snow, but
5688                  * if it is true we know the value for sure. Likewise for
5689                  * BPF_JNE.
5690                  */
5691                 if (is_jmp32) {
5692                         u64 old_v = reg->var_off.value;
5693                         u64 hi_mask = ~0xffffffffULL;
5694
5695                         reg->var_off.value = (old_v & hi_mask) | val;
5696                         reg->var_off.mask &= hi_mask;
5697                 } else {
5698                         __mark_reg_known(reg, val);
5699                 }
5700                 break;
5701         }
5702         case BPF_JSET:
5703                 false_reg->var_off = tnum_and(false_reg->var_off,
5704                                               tnum_const(~val));
5705                 if (is_power_of_2(val))
5706                         true_reg->var_off = tnum_or(true_reg->var_off,
5707                                                     tnum_const(val));
5708                 break;
5709         case BPF_JGE:
5710         case BPF_JGT:
5711         {
5712                 set_upper_bound(false_reg, val, is_jmp32, opcode == BPF_JGE);
5713                 set_lower_bound(true_reg, val, is_jmp32, opcode == BPF_JGT);
5714                 break;
5715         }
5716         case BPF_JSGE:
5717         case BPF_JSGT:
5718         {
5719                 s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
5720                 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
5721
5722                 /* If the full s64 was not sign-extended from s32 then don't
5723                  * deduct further info.
5724                  */
5725                 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
5726                         break;
5727                 false_reg->smax_value = min(false_reg->smax_value, false_smax);
5728                 true_reg->smin_value = max(true_reg->smin_value, true_smin);
5729                 break;
5730         }
5731         case BPF_JLE:
5732         case BPF_JLT:
5733         {
5734                 set_lower_bound(false_reg, val, is_jmp32, opcode == BPF_JLE);
5735                 set_upper_bound(true_reg, val, is_jmp32, opcode == BPF_JLT);
5736                 break;
5737         }
5738         case BPF_JSLE:
5739         case BPF_JSLT:
5740         {
5741                 s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
5742                 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
5743
5744                 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
5745                         break;
5746                 false_reg->smin_value = max(false_reg->smin_value, false_smin);
5747                 true_reg->smax_value = min(true_reg->smax_value, true_smax);
5748                 break;
5749         }
5750         default:
5751                 break;
5752         }
5753
5754         __reg_deduce_bounds(false_reg);
5755         __reg_deduce_bounds(true_reg);
5756         /* We might have learned some bits from the bounds. */
5757         __reg_bound_offset(false_reg);
5758         __reg_bound_offset(true_reg);
5759         /* Intersecting with the old var_off might have improved our bounds
5760          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
5761          * then new var_off is (0; 0x7f...fc) which improves our umax.
5762          */
5763         __update_reg_bounds(false_reg);
5764         __update_reg_bounds(true_reg);
5765 }
5766
5767 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
5768  * the variable reg.
5769  */
5770 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
5771                                 struct bpf_reg_state *false_reg, u64 val,
5772                                 u8 opcode, bool is_jmp32)
5773 {
5774         s64 sval;
5775
5776         if (__is_pointer_value(false, false_reg))
5777                 return;
5778
5779         val = is_jmp32 ? (u32)val : val;
5780         sval = is_jmp32 ? (s64)(s32)val : (s64)val;
5781
5782         switch (opcode) {
5783         case BPF_JEQ:
5784         case BPF_JNE:
5785         {
5786                 struct bpf_reg_state *reg =
5787                         opcode == BPF_JEQ ? true_reg : false_reg;
5788
5789                 if (is_jmp32) {
5790                         u64 old_v = reg->var_off.value;
5791                         u64 hi_mask = ~0xffffffffULL;
5792
5793                         reg->var_off.value = (old_v & hi_mask) | val;
5794                         reg->var_off.mask &= hi_mask;
5795                 } else {
5796                         __mark_reg_known(reg, val);
5797                 }
5798                 break;
5799         }
5800         case BPF_JSET:
5801                 false_reg->var_off = tnum_and(false_reg->var_off,
5802                                               tnum_const(~val));
5803                 if (is_power_of_2(val))
5804                         true_reg->var_off = tnum_or(true_reg->var_off,
5805                                                     tnum_const(val));
5806                 break;
5807         case BPF_JGE:
5808         case BPF_JGT:
5809         {
5810                 set_lower_bound(false_reg, val, is_jmp32, opcode == BPF_JGE);
5811                 set_upper_bound(true_reg, val, is_jmp32, opcode == BPF_JGT);
5812                 break;
5813         }
5814         case BPF_JSGE:
5815         case BPF_JSGT:
5816         {
5817                 s64 false_smin = opcode == BPF_JSGT ? sval    : sval + 1;
5818                 s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval;
5819
5820                 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
5821                         break;
5822                 false_reg->smin_value = max(false_reg->smin_value, false_smin);
5823                 true_reg->smax_value = min(true_reg->smax_value, true_smax);
5824                 break;
5825         }
5826         case BPF_JLE:
5827         case BPF_JLT:
5828         {
5829                 set_upper_bound(false_reg, val, is_jmp32, opcode == BPF_JLE);
5830                 set_lower_bound(true_reg, val, is_jmp32, opcode == BPF_JLT);
5831                 break;
5832         }
5833         case BPF_JSLE:
5834         case BPF_JSLT:
5835         {
5836                 s64 false_smax = opcode == BPF_JSLT ? sval    : sval - 1;
5837                 s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval;
5838
5839                 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
5840                         break;
5841                 false_reg->smax_value = min(false_reg->smax_value, false_smax);
5842                 true_reg->smin_value = max(true_reg->smin_value, true_smin);
5843                 break;
5844         }
5845         default:
5846                 break;
5847         }
5848
5849         __reg_deduce_bounds(false_reg);
5850         __reg_deduce_bounds(true_reg);
5851         /* We might have learned some bits from the bounds. */
5852         __reg_bound_offset(false_reg);
5853         __reg_bound_offset(true_reg);
5854         /* Intersecting with the old var_off might have improved our bounds
5855          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
5856          * then new var_off is (0; 0x7f...fc) which improves our umax.
5857          */
5858         __update_reg_bounds(false_reg);
5859         __update_reg_bounds(true_reg);
5860 }
5861
5862 /* Regs are known to be equal, so intersect their min/max/var_off */
5863 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
5864                                   struct bpf_reg_state *dst_reg)
5865 {
5866         src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
5867                                                         dst_reg->umin_value);
5868         src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
5869                                                         dst_reg->umax_value);
5870         src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
5871                                                         dst_reg->smin_value);
5872         src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
5873                                                         dst_reg->smax_value);
5874         src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
5875                                                              dst_reg->var_off);
5876         /* We might have learned new bounds from the var_off. */
5877         __update_reg_bounds(src_reg);
5878         __update_reg_bounds(dst_reg);
5879         /* We might have learned something about the sign bit. */
5880         __reg_deduce_bounds(src_reg);
5881         __reg_deduce_bounds(dst_reg);
5882         /* We might have learned some bits from the bounds. */
5883         __reg_bound_offset(src_reg);
5884         __reg_bound_offset(dst_reg);
5885         /* Intersecting with the old var_off might have improved our bounds
5886          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
5887          * then new var_off is (0; 0x7f...fc) which improves our umax.
5888          */
5889         __update_reg_bounds(src_reg);
5890         __update_reg_bounds(dst_reg);
5891 }
5892
5893 static void reg_combine_min_max(struct bpf_reg_state *true_src,
5894                                 struct bpf_reg_state *true_dst,
5895                                 struct bpf_reg_state *false_src,
5896                                 struct bpf_reg_state *false_dst,
5897                                 u8 opcode)
5898 {
5899         switch (opcode) {
5900         case BPF_JEQ:
5901                 __reg_combine_min_max(true_src, true_dst);
5902                 break;
5903         case BPF_JNE:
5904                 __reg_combine_min_max(false_src, false_dst);
5905                 break;
5906         }
5907 }
5908
5909 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
5910                                  struct bpf_reg_state *reg, u32 id,
5911                                  bool is_null)
5912 {
5913         if (reg_type_may_be_null(reg->type) && reg->id == id) {
5914                 /* Old offset (both fixed and variable parts) should
5915                  * have been known-zero, because we don't allow pointer
5916                  * arithmetic on pointers that might be NULL.
5917                  */
5918                 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
5919                                  !tnum_equals_const(reg->var_off, 0) ||
5920                                  reg->off)) {
5921                         __mark_reg_known_zero(reg);
5922                         reg->off = 0;
5923                 }
5924                 if (is_null) {
5925                         reg->type = SCALAR_VALUE;
5926                 } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
5927                         if (reg->map_ptr->inner_map_meta) {
5928                                 reg->type = CONST_PTR_TO_MAP;
5929                                 reg->map_ptr = reg->map_ptr->inner_map_meta;
5930                         } else if (reg->map_ptr->map_type ==
5931                                    BPF_MAP_TYPE_XSKMAP) {
5932                                 reg->type = PTR_TO_XDP_SOCK;
5933                         } else {
5934                                 reg->type = PTR_TO_MAP_VALUE;
5935                         }
5936                 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
5937                         reg->type = PTR_TO_SOCKET;
5938                 } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
5939                         reg->type = PTR_TO_SOCK_COMMON;
5940                 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
5941                         reg->type = PTR_TO_TCP_SOCK;
5942                 }
5943                 if (is_null) {
5944                         /* We don't need id and ref_obj_id from this point
5945                          * onwards anymore, thus we should better reset it,
5946                          * so that state pruning has chances to take effect.
5947                          */
5948                         reg->id = 0;
5949                         reg->ref_obj_id = 0;
5950                 } else if (!reg_may_point_to_spin_lock(reg)) {
5951                         /* For not-NULL ptr, reg->ref_obj_id will be reset
5952                          * in release_reg_references().
5953                          *
5954                          * reg->id is still used by spin_lock ptr. Other
5955                          * than spin_lock ptr type, reg->id can be reset.
5956                          */
5957                         reg->id = 0;
5958                 }
5959         }
5960 }
5961
5962 static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
5963                                     bool is_null)
5964 {
5965         struct bpf_reg_state *reg;
5966         int i;
5967
5968         for (i = 0; i < MAX_BPF_REG; i++)
5969                 mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
5970
5971         bpf_for_each_spilled_reg(i, state, reg) {
5972                 if (!reg)
5973                         continue;
5974                 mark_ptr_or_null_reg(state, reg, id, is_null);
5975         }
5976 }
5977
5978 /* The logic is similar to find_good_pkt_pointers(), both could eventually
5979  * be folded together at some point.
5980  */
5981 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
5982                                   bool is_null)
5983 {
5984         struct bpf_func_state *state = vstate->frame[vstate->curframe];
5985         struct bpf_reg_state *regs = state->regs;
5986         u32 ref_obj_id = regs[regno].ref_obj_id;
5987         u32 id = regs[regno].id;
5988         int i;
5989
5990         if (ref_obj_id && ref_obj_id == id && is_null)
5991                 /* regs[regno] is in the " == NULL" branch.
5992                  * No one could have freed the reference state before
5993                  * doing the NULL check.
5994                  */
5995                 WARN_ON_ONCE(release_reference_state(state, id));
5996
5997         for (i = 0; i <= vstate->curframe; i++)
5998                 __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
5999 }
6000
6001 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
6002                                    struct bpf_reg_state *dst_reg,
6003                                    struct bpf_reg_state *src_reg,
6004                                    struct bpf_verifier_state *this_branch,
6005                                    struct bpf_verifier_state *other_branch)
6006 {
6007         if (BPF_SRC(insn->code) != BPF_X)
6008                 return false;
6009
6010         /* Pointers are always 64-bit. */
6011         if (BPF_CLASS(insn->code) == BPF_JMP32)
6012                 return false;
6013
6014         switch (BPF_OP(insn->code)) {
6015         case BPF_JGT:
6016                 if ((dst_reg->type == PTR_TO_PACKET &&
6017                      src_reg->type == PTR_TO_PACKET_END) ||
6018                     (dst_reg->type == PTR_TO_PACKET_META &&
6019                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
6020                         /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
6021                         find_good_pkt_pointers(this_branch, dst_reg,
6022                                                dst_reg->type, false);
6023                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
6024                             src_reg->type == PTR_TO_PACKET) ||
6025                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
6026                             src_reg->type == PTR_TO_PACKET_META)) {
6027                         /* pkt_end > pkt_data', pkt_data > pkt_meta' */
6028                         find_good_pkt_pointers(other_branch, src_reg,
6029                                                src_reg->type, true);
6030                 } else {
6031                         return false;
6032                 }
6033                 break;
6034         case BPF_JLT:
6035                 if ((dst_reg->type == PTR_TO_PACKET &&
6036                      src_reg->type == PTR_TO_PACKET_END) ||
6037                     (dst_reg->type == PTR_TO_PACKET_META &&
6038                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
6039                         /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
6040                         find_good_pkt_pointers(other_branch, dst_reg,
6041                                                dst_reg->type, true);
6042                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
6043                             src_reg->type == PTR_TO_PACKET) ||
6044                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
6045                             src_reg->type == PTR_TO_PACKET_META)) {
6046                         /* pkt_end < pkt_data', pkt_data > pkt_meta' */
6047                         find_good_pkt_pointers(this_branch, src_reg,
6048                                                src_reg->type, false);
6049                 } else {
6050                         return false;
6051                 }
6052                 break;
6053         case BPF_JGE:
6054                 if ((dst_reg->type == PTR_TO_PACKET &&
6055                      src_reg->type == PTR_TO_PACKET_END) ||
6056                     (dst_reg->type == PTR_TO_PACKET_META &&
6057                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
6058                         /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
6059                         find_good_pkt_pointers(this_branch, dst_reg,
6060                                                dst_reg->type, true);
6061                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
6062                             src_reg->type == PTR_TO_PACKET) ||
6063                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
6064                             src_reg->type == PTR_TO_PACKET_META)) {
6065                         /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
6066                         find_good_pkt_pointers(other_branch, src_reg,
6067                                                src_reg->type, false);
6068                 } else {
6069                         return false;
6070                 }
6071                 break;
6072         case BPF_JLE:
6073                 if ((dst_reg->type == PTR_TO_PACKET &&
6074                      src_reg->type == PTR_TO_PACKET_END) ||
6075                     (dst_reg->type == PTR_TO_PACKET_META &&
6076                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
6077                         /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
6078                         find_good_pkt_pointers(other_branch, dst_reg,
6079                                                dst_reg->type, false);
6080                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
6081                             src_reg->type == PTR_TO_PACKET) ||
6082                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
6083                             src_reg->type == PTR_TO_PACKET_META)) {
6084                         /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
6085                         find_good_pkt_pointers(this_branch, src_reg,
6086                                                src_reg->type, true);
6087                 } else {
6088                         return false;
6089                 }
6090                 break;
6091         default:
6092                 return false;
6093         }
6094
6095         return true;
6096 }
6097
6098 static int check_cond_jmp_op(struct bpf_verifier_env *env,
6099                              struct bpf_insn *insn, int *insn_idx)
6100 {
6101         struct bpf_verifier_state *this_branch = env->cur_state;
6102         struct bpf_verifier_state *other_branch;
6103         struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
6104         struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
6105         u8 opcode = BPF_OP(insn->code);
6106         bool is_jmp32;
6107         int pred = -1;
6108         int err;
6109
6110         /* Only conditional jumps are expected to reach here. */
6111         if (opcode == BPF_JA || opcode > BPF_JSLE) {
6112                 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
6113                 return -EINVAL;
6114         }
6115
6116         if (BPF_SRC(insn->code) == BPF_X) {
6117                 if (insn->imm != 0) {
6118                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
6119                         return -EINVAL;
6120                 }
6121
6122                 /* check src1 operand */
6123                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
6124                 if (err)
6125                         return err;
6126
6127                 if (is_pointer_value(env, insn->src_reg)) {
6128                         verbose(env, "R%d pointer comparison prohibited\n",
6129                                 insn->src_reg);
6130                         return -EACCES;
6131                 }
6132                 src_reg = &regs[insn->src_reg];
6133         } else {
6134                 if (insn->src_reg != BPF_REG_0) {
6135                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
6136                         return -EINVAL;
6137                 }
6138         }
6139
6140         /* check src2 operand */
6141         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6142         if (err)
6143                 return err;
6144
6145         dst_reg = &regs[insn->dst_reg];
6146         is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
6147
6148         if (BPF_SRC(insn->code) == BPF_K)
6149                 pred = is_branch_taken(dst_reg, insn->imm,
6150                                        opcode, is_jmp32);
6151         else if (src_reg->type == SCALAR_VALUE &&
6152                  tnum_is_const(src_reg->var_off))
6153                 pred = is_branch_taken(dst_reg, src_reg->var_off.value,
6154                                        opcode, is_jmp32);
6155         if (pred >= 0) {
6156                 err = mark_chain_precision(env, insn->dst_reg);
6157                 if (BPF_SRC(insn->code) == BPF_X && !err)
6158                         err = mark_chain_precision(env, insn->src_reg);
6159                 if (err)
6160                         return err;
6161         }
6162
6163         if (pred == 1) {
6164                 /* Only follow the goto, ignore fall-through. If needed, push
6165                  * the fall-through branch for simulation under speculative
6166                  * execution.
6167                  */
6168                 if (!env->allow_ptr_leaks &&
6169                     !sanitize_speculative_path(env, insn, *insn_idx + 1,
6170                                                *insn_idx))
6171                         return -EFAULT;
6172                 *insn_idx += insn->off;
6173                 return 0;
6174         } else if (pred == 0) {
6175                 /* Only follow the fall-through branch, since that's where the
6176                  * program will go. If needed, push the goto branch for
6177                  * simulation under speculative execution.
6178                  */
6179                 if (!env->allow_ptr_leaks &&
6180                     !sanitize_speculative_path(env, insn,
6181                                                *insn_idx + insn->off + 1,
6182                                                *insn_idx))
6183                         return -EFAULT;
6184                 return 0;
6185         }
6186
6187         other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
6188                                   false);
6189         if (!other_branch)
6190                 return -EFAULT;
6191         other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
6192
6193         /* detect if we are comparing against a constant value so we can adjust
6194          * our min/max values for our dst register.
6195          * this is only legit if both are scalars (or pointers to the same
6196          * object, I suppose, but we don't support that right now), because
6197          * otherwise the different base pointers mean the offsets aren't
6198          * comparable.
6199          */
6200         if (BPF_SRC(insn->code) == BPF_X) {
6201                 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
6202                 struct bpf_reg_state lo_reg0 = *dst_reg;
6203                 struct bpf_reg_state lo_reg1 = *src_reg;
6204                 struct bpf_reg_state *src_lo, *dst_lo;
6205
6206                 dst_lo = &lo_reg0;
6207                 src_lo = &lo_reg1;
6208                 coerce_reg_to_size(dst_lo, 4);
6209                 coerce_reg_to_size(src_lo, 4);
6210
6211                 if (dst_reg->type == SCALAR_VALUE &&
6212                     src_reg->type == SCALAR_VALUE) {
6213                         if (tnum_is_const(src_reg->var_off) ||
6214                             (is_jmp32 && tnum_is_const(src_lo->var_off)))
6215                                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
6216                                                 dst_reg,
6217                                                 is_jmp32
6218                                                 ? src_lo->var_off.value
6219                                                 : src_reg->var_off.value,
6220                                                 opcode, is_jmp32);
6221                         else if (tnum_is_const(dst_reg->var_off) ||
6222                                  (is_jmp32 && tnum_is_const(dst_lo->var_off)))
6223                                 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
6224                                                     src_reg,
6225                                                     is_jmp32
6226                                                     ? dst_lo->var_off.value
6227                                                     : dst_reg->var_off.value,
6228                                                     opcode, is_jmp32);
6229                         else if (!is_jmp32 &&
6230                                  (opcode == BPF_JEQ || opcode == BPF_JNE))
6231                                 /* Comparing for equality, we can combine knowledge */
6232                                 reg_combine_min_max(&other_branch_regs[insn->src_reg],
6233                                                     &other_branch_regs[insn->dst_reg],
6234                                                     src_reg, dst_reg, opcode);
6235                 }
6236         } else if (dst_reg->type == SCALAR_VALUE) {
6237                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
6238                                         dst_reg, insn->imm, opcode, is_jmp32);
6239         }
6240
6241         /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
6242          * NOTE: these optimizations below are related with pointer comparison
6243          *       which will never be JMP32.
6244          */
6245         if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
6246             insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
6247             reg_type_may_be_null(dst_reg->type)) {
6248                 /* Mark all identical registers in each branch as either
6249                  * safe or unknown depending R == 0 or R != 0 conditional.
6250                  */
6251                 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
6252                                       opcode == BPF_JNE);
6253                 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
6254                                       opcode == BPF_JEQ);
6255         } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
6256                                            this_branch, other_branch) &&
6257                    is_pointer_value(env, insn->dst_reg)) {
6258                 verbose(env, "R%d pointer comparison prohibited\n",
6259                         insn->dst_reg);
6260                 return -EACCES;
6261         }
6262         if (env->log.level & BPF_LOG_LEVEL)
6263                 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
6264         return 0;
6265 }
6266
6267 /* verify BPF_LD_IMM64 instruction */
6268 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
6269 {
6270         struct bpf_insn_aux_data *aux = cur_aux(env);
6271         struct bpf_reg_state *regs = cur_regs(env);
6272         struct bpf_map *map;
6273         int err;
6274
6275         if (BPF_SIZE(insn->code) != BPF_DW) {
6276                 verbose(env, "invalid BPF_LD_IMM insn\n");
6277                 return -EINVAL;
6278         }
6279         if (insn->off != 0) {
6280                 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
6281                 return -EINVAL;
6282         }
6283
6284         err = check_reg_arg(env, insn->dst_reg, DST_OP);
6285         if (err)
6286                 return err;
6287
6288         if (insn->src_reg == 0) {
6289                 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
6290
6291                 regs[insn->dst_reg].type = SCALAR_VALUE;
6292                 __mark_reg_known(&regs[insn->dst_reg], imm);
6293                 return 0;
6294         }
6295
6296         map = env->used_maps[aux->map_index];
6297         mark_reg_known_zero(env, regs, insn->dst_reg);
6298         regs[insn->dst_reg].map_ptr = map;
6299
6300         if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
6301                 regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
6302                 regs[insn->dst_reg].off = aux->map_off;
6303                 if (map_value_has_spin_lock(map))
6304                         regs[insn->dst_reg].id = ++env->id_gen;
6305         } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
6306                 regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
6307         } else {
6308                 verbose(env, "bpf verifier is misconfigured\n");
6309                 return -EINVAL;
6310         }
6311
6312         return 0;
6313 }
6314
6315 static bool may_access_skb(enum bpf_prog_type type)
6316 {
6317         switch (type) {
6318         case BPF_PROG_TYPE_SOCKET_FILTER:
6319         case BPF_PROG_TYPE_SCHED_CLS:
6320         case BPF_PROG_TYPE_SCHED_ACT:
6321                 return true;
6322         default:
6323                 return false;
6324         }
6325 }
6326
6327 /* verify safety of LD_ABS|LD_IND instructions:
6328  * - they can only appear in the programs where ctx == skb
6329  * - since they are wrappers of function calls, they scratch R1-R5 registers,
6330  *   preserve R6-R9, and store return value into R0
6331  *
6332  * Implicit input:
6333  *   ctx == skb == R6 == CTX
6334  *
6335  * Explicit input:
6336  *   SRC == any register
6337  *   IMM == 32-bit immediate
6338  *
6339  * Output:
6340  *   R0 - 8/16/32-bit skb data converted to cpu endianness
6341  */
6342 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
6343 {
6344         struct bpf_reg_state *regs = cur_regs(env);
6345         static const int ctx_reg = BPF_REG_6;
6346         u8 mode = BPF_MODE(insn->code);
6347         int i, err;
6348
6349         if (!may_access_skb(env->prog->type)) {
6350                 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
6351                 return -EINVAL;
6352         }
6353
6354         if (!env->ops->gen_ld_abs) {
6355                 verbose(env, "bpf verifier is misconfigured\n");
6356                 return -EINVAL;
6357         }
6358
6359         if (env->subprog_cnt > 1) {
6360                 /* when program has LD_ABS insn JITs and interpreter assume
6361                  * that r1 == ctx == skb which is not the case for callees
6362                  * that can have arbitrary arguments. It's problematic
6363                  * for main prog as well since JITs would need to analyze
6364                  * all functions in order to make proper register save/restore
6365                  * decisions in the main prog. Hence disallow LD_ABS with calls
6366                  */
6367                 verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
6368                 return -EINVAL;
6369         }
6370
6371         if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
6372             BPF_SIZE(insn->code) == BPF_DW ||
6373             (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
6374                 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
6375                 return -EINVAL;
6376         }
6377
6378         /* check whether implicit source operand (register R6) is readable */
6379         err = check_reg_arg(env, ctx_reg, SRC_OP);
6380         if (err)
6381                 return err;
6382
6383         /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
6384          * gen_ld_abs() may terminate the program at runtime, leading to
6385          * reference leak.
6386          */
6387         err = check_reference_leak(env);
6388         if (err) {
6389                 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
6390                 return err;
6391         }
6392
6393         if (env->cur_state->active_spin_lock) {
6394                 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
6395                 return -EINVAL;
6396         }
6397
6398         if (regs[ctx_reg].type != PTR_TO_CTX) {
6399                 verbose(env,
6400                         "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
6401                 return -EINVAL;
6402         }
6403
6404         if (mode == BPF_IND) {
6405                 /* check explicit source operand */
6406                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
6407                 if (err)
6408                         return err;
6409         }
6410
6411         err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
6412         if (err < 0)
6413                 return err;
6414
6415         /* reset caller saved regs to unreadable */
6416         for (i = 0; i < CALLER_SAVED_REGS; i++) {
6417                 mark_reg_not_init(env, regs, caller_saved[i]);
6418                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
6419         }
6420
6421         /* mark destination R0 register as readable, since it contains
6422          * the value fetched from the packet.
6423          * Already marked as written above.
6424          */
6425         mark_reg_unknown(env, regs, BPF_REG_0);
6426         /* ld_abs load up to 32-bit skb data. */
6427         regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
6428         return 0;
6429 }
6430
6431 static int check_return_code(struct bpf_verifier_env *env)
6432 {
6433         struct tnum enforce_attach_type_range = tnum_unknown;
6434         struct bpf_reg_state *reg;
6435         struct tnum range = tnum_range(0, 1);
6436
6437         switch (env->prog->type) {
6438         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
6439                 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
6440                     env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG)
6441                         range = tnum_range(1, 1);
6442                 break;
6443         case BPF_PROG_TYPE_CGROUP_SKB:
6444                 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
6445                         range = tnum_range(0, 3);
6446                         enforce_attach_type_range = tnum_range(2, 3);
6447                 }
6448                 break;
6449         case BPF_PROG_TYPE_CGROUP_SOCK:
6450         case BPF_PROG_TYPE_SOCK_OPS:
6451         case BPF_PROG_TYPE_CGROUP_DEVICE:
6452         case BPF_PROG_TYPE_CGROUP_SYSCTL:
6453         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
6454                 break;
6455         default:
6456                 return 0;
6457         }
6458
6459         reg = cur_regs(env) + BPF_REG_0;
6460         if (reg->type != SCALAR_VALUE) {
6461                 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
6462                         reg_type_str[reg->type]);
6463                 return -EINVAL;
6464         }
6465
6466         if (!tnum_in(range, reg->var_off)) {
6467                 char tn_buf[48];
6468
6469                 verbose(env, "At program exit the register R0 ");
6470                 if (!tnum_is_unknown(reg->var_off)) {
6471                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6472                         verbose(env, "has value %s", tn_buf);
6473                 } else {
6474                         verbose(env, "has unknown scalar value");
6475                 }
6476                 tnum_strn(tn_buf, sizeof(tn_buf), range);
6477                 verbose(env, " should have been in %s\n", tn_buf);
6478                 return -EINVAL;
6479         }
6480
6481         if (!tnum_is_unknown(enforce_attach_type_range) &&
6482             tnum_in(enforce_attach_type_range, reg->var_off))
6483                 env->prog->enforce_expected_attach_type = 1;
6484         return 0;
6485 }
6486
6487 /* non-recursive DFS pseudo code
6488  * 1  procedure DFS-iterative(G,v):
6489  * 2      label v as discovered
6490  * 3      let S be a stack
6491  * 4      S.push(v)
6492  * 5      while S is not empty
6493  * 6            t <- S.pop()
6494  * 7            if t is what we're looking for:
6495  * 8                return t
6496  * 9            for all edges e in G.adjacentEdges(t) do
6497  * 10               if edge e is already labelled
6498  * 11                   continue with the next edge
6499  * 12               w <- G.adjacentVertex(t,e)
6500  * 13               if vertex w is not discovered and not explored
6501  * 14                   label e as tree-edge
6502  * 15                   label w as discovered
6503  * 16                   S.push(w)
6504  * 17                   continue at 5
6505  * 18               else if vertex w is discovered
6506  * 19                   label e as back-edge
6507  * 20               else
6508  * 21                   // vertex w is explored
6509  * 22                   label e as forward- or cross-edge
6510  * 23           label t as explored
6511  * 24           S.pop()
6512  *
6513  * convention:
6514  * 0x10 - discovered
6515  * 0x11 - discovered and fall-through edge labelled
6516  * 0x12 - discovered and fall-through and branch edges labelled
6517  * 0x20 - explored
6518  */
6519
6520 enum {
6521         DISCOVERED = 0x10,
6522         EXPLORED = 0x20,
6523         FALLTHROUGH = 1,
6524         BRANCH = 2,
6525 };
6526
6527 static u32 state_htab_size(struct bpf_verifier_env *env)
6528 {
6529         return env->prog->len;
6530 }
6531
6532 static struct bpf_verifier_state_list **explored_state(
6533                                         struct bpf_verifier_env *env,
6534                                         int idx)
6535 {
6536         struct bpf_verifier_state *cur = env->cur_state;
6537         struct bpf_func_state *state = cur->frame[cur->curframe];
6538
6539         return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
6540 }
6541
6542 static void init_explored_state(struct bpf_verifier_env *env, int idx)
6543 {
6544         env->insn_aux_data[idx].prune_point = true;
6545 }
6546
6547 /* t, w, e - match pseudo-code above:
6548  * t - index of current instruction
6549  * w - next instruction
6550  * e - edge
6551  */
6552 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
6553                      bool loop_ok)
6554 {
6555         int *insn_stack = env->cfg.insn_stack;
6556         int *insn_state = env->cfg.insn_state;
6557
6558         if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
6559                 return 0;
6560
6561         if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
6562                 return 0;
6563
6564         if (w < 0 || w >= env->prog->len) {
6565                 verbose_linfo(env, t, "%d: ", t);
6566                 verbose(env, "jump out of range from insn %d to %d\n", t, w);
6567                 return -EINVAL;
6568         }
6569
6570         if (e == BRANCH)
6571                 /* mark branch target for state pruning */
6572                 init_explored_state(env, w);
6573
6574         if (insn_state[w] == 0) {
6575                 /* tree-edge */
6576                 insn_state[t] = DISCOVERED | e;
6577                 insn_state[w] = DISCOVERED;
6578                 if (env->cfg.cur_stack >= env->prog->len)
6579                         return -E2BIG;
6580                 insn_stack[env->cfg.cur_stack++] = w;
6581                 return 1;
6582         } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
6583                 if (loop_ok && env->allow_ptr_leaks)
6584                         return 0;
6585                 verbose_linfo(env, t, "%d: ", t);
6586                 verbose_linfo(env, w, "%d: ", w);
6587                 verbose(env, "back-edge from insn %d to %d\n", t, w);
6588                 return -EINVAL;
6589         } else if (insn_state[w] == EXPLORED) {
6590                 /* forward- or cross-edge */
6591                 insn_state[t] = DISCOVERED | e;
6592         } else {
6593                 verbose(env, "insn state internal bug\n");
6594                 return -EFAULT;
6595         }
6596         return 0;
6597 }
6598
6599 /* non-recursive depth-first-search to detect loops in BPF program
6600  * loop == back-edge in directed graph
6601  */
6602 static int check_cfg(struct bpf_verifier_env *env)
6603 {
6604         struct bpf_insn *insns = env->prog->insnsi;
6605         int insn_cnt = env->prog->len;
6606         int *insn_stack, *insn_state;
6607         int ret = 0;
6608         int i, t;
6609
6610         insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
6611         if (!insn_state)
6612                 return -ENOMEM;
6613
6614         insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
6615         if (!insn_stack) {
6616                 kvfree(insn_state);
6617                 return -ENOMEM;
6618         }
6619
6620         insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
6621         insn_stack[0] = 0; /* 0 is the first instruction */
6622         env->cfg.cur_stack = 1;
6623
6624 peek_stack:
6625         if (env->cfg.cur_stack == 0)
6626                 goto check_state;
6627         t = insn_stack[env->cfg.cur_stack - 1];
6628
6629         if (BPF_CLASS(insns[t].code) == BPF_JMP ||
6630             BPF_CLASS(insns[t].code) == BPF_JMP32) {
6631                 u8 opcode = BPF_OP(insns[t].code);
6632
6633                 if (opcode == BPF_EXIT) {
6634                         goto mark_explored;
6635                 } else if (opcode == BPF_CALL) {
6636                         ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
6637                         if (ret == 1)
6638                                 goto peek_stack;
6639                         else if (ret < 0)
6640                                 goto err_free;
6641                         if (t + 1 < insn_cnt)
6642                                 init_explored_state(env, t + 1);
6643                         if (insns[t].src_reg == BPF_PSEUDO_CALL) {
6644                                 init_explored_state(env, t);
6645                                 ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
6646                                                 env, false);
6647                                 if (ret == 1)
6648                                         goto peek_stack;
6649                                 else if (ret < 0)
6650                                         goto err_free;
6651                         }
6652                 } else if (opcode == BPF_JA) {
6653                         if (BPF_SRC(insns[t].code) != BPF_K) {
6654                                 ret = -EINVAL;
6655                                 goto err_free;
6656                         }
6657                         /* unconditional jump with single edge */
6658                         ret = push_insn(t, t + insns[t].off + 1,
6659                                         FALLTHROUGH, env, true);
6660                         if (ret == 1)
6661                                 goto peek_stack;
6662                         else if (ret < 0)
6663                                 goto err_free;
6664                         /* unconditional jmp is not a good pruning point,
6665                          * but it's marked, since backtracking needs
6666                          * to record jmp history in is_state_visited().
6667                          */
6668                         init_explored_state(env, t + insns[t].off + 1);
6669                         /* tell verifier to check for equivalent states
6670                          * after every call and jump
6671                          */
6672                         if (t + 1 < insn_cnt)
6673                                 init_explored_state(env, t + 1);
6674                 } else {
6675                         /* conditional jump with two edges */
6676                         init_explored_state(env, t);
6677                         ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
6678                         if (ret == 1)
6679                                 goto peek_stack;
6680                         else if (ret < 0)
6681                                 goto err_free;
6682
6683                         ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
6684                         if (ret == 1)
6685                                 goto peek_stack;
6686                         else if (ret < 0)
6687                                 goto err_free;
6688                 }
6689         } else {
6690                 /* all other non-branch instructions with single
6691                  * fall-through edge
6692                  */
6693                 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
6694                 if (ret == 1)
6695                         goto peek_stack;
6696                 else if (ret < 0)
6697                         goto err_free;
6698         }
6699
6700 mark_explored:
6701         insn_state[t] = EXPLORED;
6702         if (env->cfg.cur_stack-- <= 0) {
6703                 verbose(env, "pop stack internal bug\n");
6704                 ret = -EFAULT;
6705                 goto err_free;
6706         }
6707         goto peek_stack;
6708
6709 check_state:
6710         for (i = 0; i < insn_cnt; i++) {
6711                 if (insn_state[i] != EXPLORED) {
6712                         verbose(env, "unreachable insn %d\n", i);
6713                         ret = -EINVAL;
6714                         goto err_free;
6715                 }
6716         }
6717         ret = 0; /* cfg looks good */
6718
6719 err_free:
6720         kvfree(insn_state);
6721         kvfree(insn_stack);
6722         env->cfg.insn_state = env->cfg.insn_stack = NULL;
6723         return ret;
6724 }
6725
6726 /* The minimum supported BTF func info size */
6727 #define MIN_BPF_FUNCINFO_SIZE   8
6728 #define MAX_FUNCINFO_REC_SIZE   252
6729
6730 static int check_btf_func(struct bpf_verifier_env *env,
6731                           const union bpf_attr *attr,
6732                           union bpf_attr __user *uattr)
6733 {
6734         u32 i, nfuncs, urec_size, min_size;
6735         u32 krec_size = sizeof(struct bpf_func_info);
6736         struct bpf_func_info *krecord;
6737         const struct btf_type *type;
6738         struct bpf_prog *prog;
6739         const struct btf *btf;
6740         void __user *urecord;
6741         u32 prev_offset = 0;
6742         int ret = 0;
6743
6744         nfuncs = attr->func_info_cnt;
6745         if (!nfuncs)
6746                 return 0;
6747
6748         if (nfuncs != env->subprog_cnt) {
6749                 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
6750                 return -EINVAL;
6751         }
6752
6753         urec_size = attr->func_info_rec_size;
6754         if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
6755             urec_size > MAX_FUNCINFO_REC_SIZE ||
6756             urec_size % sizeof(u32)) {
6757                 verbose(env, "invalid func info rec size %u\n", urec_size);
6758                 return -EINVAL;
6759         }
6760
6761         prog = env->prog;
6762         btf = prog->aux->btf;
6763
6764         urecord = u64_to_user_ptr(attr->func_info);
6765         min_size = min_t(u32, krec_size, urec_size);
6766
6767         krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
6768         if (!krecord)
6769                 return -ENOMEM;
6770
6771         for (i = 0; i < nfuncs; i++) {
6772                 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
6773                 if (ret) {
6774                         if (ret == -E2BIG) {
6775                                 verbose(env, "nonzero tailing record in func info");
6776                                 /* set the size kernel expects so loader can zero
6777                                  * out the rest of the record.
6778                                  */
6779                                 if (put_user(min_size, &uattr->func_info_rec_size))
6780                                         ret = -EFAULT;
6781                         }
6782                         goto err_free;
6783                 }
6784
6785                 if (copy_from_user(&krecord[i], urecord, min_size)) {
6786                         ret = -EFAULT;
6787                         goto err_free;
6788                 }
6789
6790                 /* check insn_off */
6791                 if (i == 0) {
6792                         if (krecord[i].insn_off) {
6793                                 verbose(env,
6794                                         "nonzero insn_off %u for the first func info record",
6795                                         krecord[i].insn_off);
6796                                 ret = -EINVAL;
6797                                 goto err_free;
6798                         }
6799                 } else if (krecord[i].insn_off <= prev_offset) {
6800                         verbose(env,
6801                                 "same or smaller insn offset (%u) than previous func info record (%u)",
6802                                 krecord[i].insn_off, prev_offset);
6803                         ret = -EINVAL;
6804                         goto err_free;
6805                 }
6806
6807                 if (env->subprog_info[i].start != krecord[i].insn_off) {
6808                         verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
6809                         ret = -EINVAL;
6810                         goto err_free;
6811                 }
6812
6813                 /* check type_id */
6814                 type = btf_type_by_id(btf, krecord[i].type_id);
6815                 if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
6816                         verbose(env, "invalid type id %d in func info",
6817                                 krecord[i].type_id);
6818                         ret = -EINVAL;
6819                         goto err_free;
6820                 }
6821
6822                 prev_offset = krecord[i].insn_off;
6823                 urecord += urec_size;
6824         }
6825
6826         prog->aux->func_info = krecord;
6827         prog->aux->func_info_cnt = nfuncs;
6828         return 0;
6829
6830 err_free:
6831         kvfree(krecord);
6832         return ret;
6833 }
6834
6835 static void adjust_btf_func(struct bpf_verifier_env *env)
6836 {
6837         int i;
6838
6839         if (!env->prog->aux->func_info)
6840                 return;
6841
6842         for (i = 0; i < env->subprog_cnt; i++)
6843                 env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
6844 }
6845
6846 #define MIN_BPF_LINEINFO_SIZE   (offsetof(struct bpf_line_info, line_col) + \
6847                 sizeof(((struct bpf_line_info *)(0))->line_col))
6848 #define MAX_LINEINFO_REC_SIZE   MAX_FUNCINFO_REC_SIZE
6849
6850 static int check_btf_line(struct bpf_verifier_env *env,
6851                           const union bpf_attr *attr,
6852                           union bpf_attr __user *uattr)
6853 {
6854         u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
6855         struct bpf_subprog_info *sub;
6856         struct bpf_line_info *linfo;
6857         struct bpf_prog *prog;
6858         const struct btf *btf;
6859         void __user *ulinfo;
6860         int err;
6861
6862         nr_linfo = attr->line_info_cnt;
6863         if (!nr_linfo)
6864                 return 0;
6865         if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
6866                 return -EINVAL;
6867
6868         rec_size = attr->line_info_rec_size;
6869         if (rec_size < MIN_BPF_LINEINFO_SIZE ||
6870             rec_size > MAX_LINEINFO_REC_SIZE ||
6871             rec_size & (sizeof(u32) - 1))
6872                 return -EINVAL;
6873
6874         /* Need to zero it in case the userspace may
6875          * pass in a smaller bpf_line_info object.
6876          */
6877         linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
6878                          GFP_KERNEL | __GFP_NOWARN);
6879         if (!linfo)
6880                 return -ENOMEM;
6881
6882         prog = env->prog;
6883         btf = prog->aux->btf;
6884
6885         s = 0;
6886         sub = env->subprog_info;
6887         ulinfo = u64_to_user_ptr(attr->line_info);
6888         expected_size = sizeof(struct bpf_line_info);
6889         ncopy = min_t(u32, expected_size, rec_size);
6890         for (i = 0; i < nr_linfo; i++) {
6891                 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
6892                 if (err) {
6893                         if (err == -E2BIG) {
6894                                 verbose(env, "nonzero tailing record in line_info");
6895                                 if (put_user(expected_size,
6896                                              &uattr->line_info_rec_size))
6897                                         err = -EFAULT;
6898                         }
6899                         goto err_free;
6900                 }
6901
6902                 if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
6903                         err = -EFAULT;
6904                         goto err_free;
6905                 }
6906
6907                 /*
6908                  * Check insn_off to ensure
6909                  * 1) strictly increasing AND
6910                  * 2) bounded by prog->len
6911                  *
6912                  * The linfo[0].insn_off == 0 check logically falls into
6913                  * the later "missing bpf_line_info for func..." case
6914                  * because the first linfo[0].insn_off must be the
6915                  * first sub also and the first sub must have
6916                  * subprog_info[0].start == 0.
6917                  */
6918                 if ((i && linfo[i].insn_off <= prev_offset) ||
6919                     linfo[i].insn_off >= prog->len) {
6920                         verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
6921                                 i, linfo[i].insn_off, prev_offset,
6922                                 prog->len);
6923                         err = -EINVAL;
6924                         goto err_free;
6925                 }
6926
6927                 if (!prog->insnsi[linfo[i].insn_off].code) {
6928                         verbose(env,
6929                                 "Invalid insn code at line_info[%u].insn_off\n",
6930                                 i);
6931                         err = -EINVAL;
6932                         goto err_free;
6933                 }
6934
6935                 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
6936                     !btf_name_by_offset(btf, linfo[i].file_name_off)) {
6937                         verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
6938                         err = -EINVAL;
6939                         goto err_free;
6940                 }
6941
6942                 if (s != env->subprog_cnt) {
6943                         if (linfo[i].insn_off == sub[s].start) {
6944                                 sub[s].linfo_idx = i;
6945                                 s++;
6946                         } else if (sub[s].start < linfo[i].insn_off) {
6947                                 verbose(env, "missing bpf_line_info for func#%u\n", s);
6948                                 err = -EINVAL;
6949                                 goto err_free;
6950                         }
6951                 }
6952
6953                 prev_offset = linfo[i].insn_off;
6954                 ulinfo += rec_size;
6955         }
6956
6957         if (s != env->subprog_cnt) {
6958                 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
6959                         env->subprog_cnt - s, s);
6960                 err = -EINVAL;
6961                 goto err_free;
6962         }
6963
6964         prog->aux->linfo = linfo;
6965         prog->aux->nr_linfo = nr_linfo;
6966
6967         return 0;
6968
6969 err_free:
6970         kvfree(linfo);
6971         return err;
6972 }
6973
6974 static int check_btf_info(struct bpf_verifier_env *env,
6975                           const union bpf_attr *attr,
6976                           union bpf_attr __user *uattr)
6977 {
6978         struct btf *btf;
6979         int err;
6980
6981         if (!attr->func_info_cnt && !attr->line_info_cnt)
6982                 return 0;
6983
6984         btf = btf_get_by_fd(attr->prog_btf_fd);
6985         if (IS_ERR(btf))
6986                 return PTR_ERR(btf);
6987         env->prog->aux->btf = btf;
6988
6989         err = check_btf_func(env, attr, uattr);
6990         if (err)
6991                 return err;
6992
6993         err = check_btf_line(env, attr, uattr);
6994         if (err)
6995                 return err;
6996
6997         return 0;
6998 }
6999
7000 /* check %cur's range satisfies %old's */
7001 static bool range_within(struct bpf_reg_state *old,
7002                          struct bpf_reg_state *cur)
7003 {
7004         return old->umin_value <= cur->umin_value &&
7005                old->umax_value >= cur->umax_value &&
7006                old->smin_value <= cur->smin_value &&
7007                old->smax_value >= cur->smax_value;
7008 }
7009
7010 /* If in the old state two registers had the same id, then they need to have
7011  * the same id in the new state as well.  But that id could be different from
7012  * the old state, so we need to track the mapping from old to new ids.
7013  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
7014  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
7015  * regs with a different old id could still have new id 9, we don't care about
7016  * that.
7017  * So we look through our idmap to see if this old id has been seen before.  If
7018  * so, we require the new id to match; otherwise, we add the id pair to the map.
7019  */
7020 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
7021 {
7022         unsigned int i;
7023
7024         for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
7025                 if (!idmap[i].old) {
7026                         /* Reached an empty slot; haven't seen this id before */
7027                         idmap[i].old = old_id;
7028                         idmap[i].cur = cur_id;
7029                         return true;
7030                 }
7031                 if (idmap[i].old == old_id)
7032                         return idmap[i].cur == cur_id;
7033         }
7034         /* We ran out of idmap slots, which should be impossible */
7035         WARN_ON_ONCE(1);
7036         return false;
7037 }
7038
7039 static void clean_func_state(struct bpf_verifier_env *env,
7040                              struct bpf_func_state *st)
7041 {
7042         enum bpf_reg_liveness live;
7043         int i, j;
7044
7045         for (i = 0; i < BPF_REG_FP; i++) {
7046                 live = st->regs[i].live;
7047                 /* liveness must not touch this register anymore */
7048                 st->regs[i].live |= REG_LIVE_DONE;
7049                 if (!(live & REG_LIVE_READ))
7050                         /* since the register is unused, clear its state
7051                          * to make further comparison simpler
7052                          */
7053                         __mark_reg_not_init(env, &st->regs[i]);
7054         }
7055
7056         for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
7057                 live = st->stack[i].spilled_ptr.live;
7058                 /* liveness must not touch this stack slot anymore */
7059                 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
7060                 if (!(live & REG_LIVE_READ)) {
7061                         __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
7062                         for (j = 0; j < BPF_REG_SIZE; j++)
7063                                 st->stack[i].slot_type[j] = STACK_INVALID;
7064                 }
7065         }
7066 }
7067
7068 static void clean_verifier_state(struct bpf_verifier_env *env,
7069                                  struct bpf_verifier_state *st)
7070 {
7071         int i;
7072
7073         if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
7074                 /* all regs in this state in all frames were already marked */
7075                 return;
7076
7077         for (i = 0; i <= st->curframe; i++)
7078                 clean_func_state(env, st->frame[i]);
7079 }
7080
7081 /* the parentage chains form a tree.
7082  * the verifier states are added to state lists at given insn and
7083  * pushed into state stack for future exploration.
7084  * when the verifier reaches bpf_exit insn some of the verifer states
7085  * stored in the state lists have their final liveness state already,
7086  * but a lot of states will get revised from liveness point of view when
7087  * the verifier explores other branches.
7088  * Example:
7089  * 1: r0 = 1
7090  * 2: if r1 == 100 goto pc+1
7091  * 3: r0 = 2
7092  * 4: exit
7093  * when the verifier reaches exit insn the register r0 in the state list of
7094  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
7095  * of insn 2 and goes exploring further. At the insn 4 it will walk the
7096  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
7097  *
7098  * Since the verifier pushes the branch states as it sees them while exploring
7099  * the program the condition of walking the branch instruction for the second
7100  * time means that all states below this branch were already explored and
7101  * their final liveness markes are already propagated.
7102  * Hence when the verifier completes the search of state list in is_state_visited()
7103  * we can call this clean_live_states() function to mark all liveness states
7104  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
7105  * will not be used.
7106  * This function also clears the registers and stack for states that !READ
7107  * to simplify state merging.
7108  *
7109  * Important note here that walking the same branch instruction in the callee
7110  * doesn't meant that the states are DONE. The verifier has to compare
7111  * the callsites
7112  */
7113 static void clean_live_states(struct bpf_verifier_env *env, int insn,
7114                               struct bpf_verifier_state *cur)
7115 {
7116         struct bpf_verifier_state_list *sl;
7117         int i;
7118
7119         sl = *explored_state(env, insn);
7120         while (sl) {
7121                 if (sl->state.branches)
7122                         goto next;
7123                 if (sl->state.insn_idx != insn ||
7124                     sl->state.curframe != cur->curframe)
7125                         goto next;
7126                 for (i = 0; i <= cur->curframe; i++)
7127                         if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
7128                                 goto next;
7129                 clean_verifier_state(env, &sl->state);
7130 next:
7131                 sl = sl->next;
7132         }
7133 }
7134
7135 /* Returns true if (rold safe implies rcur safe) */
7136 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
7137                     struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
7138 {
7139         bool equal;
7140
7141         if (!(rold->live & REG_LIVE_READ))
7142                 /* explored state didn't use this */
7143                 return true;
7144
7145         equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
7146
7147         if (rold->type == PTR_TO_STACK)
7148                 /* two stack pointers are equal only if they're pointing to
7149                  * the same stack frame, since fp-8 in foo != fp-8 in bar
7150                  */
7151                 return equal && rold->frameno == rcur->frameno;
7152
7153         if (equal)
7154                 return true;
7155
7156         if (rold->type == NOT_INIT)
7157                 /* explored state can't have used this */
7158                 return true;
7159         if (rcur->type == NOT_INIT)
7160                 return false;
7161         switch (rold->type) {
7162         case SCALAR_VALUE:
7163                 if (env->explore_alu_limits)
7164                         return false;
7165                 if (rcur->type == SCALAR_VALUE) {
7166                         if (!rold->precise && !rcur->precise)
7167                                 return true;
7168                         /* new val must satisfy old val knowledge */
7169                         return range_within(rold, rcur) &&
7170                                tnum_in(rold->var_off, rcur->var_off);
7171                 } else {
7172                         /* We're trying to use a pointer in place of a scalar.
7173                          * Even if the scalar was unbounded, this could lead to
7174                          * pointer leaks because scalars are allowed to leak
7175                          * while pointers are not. We could make this safe in
7176                          * special cases if root is calling us, but it's
7177                          * probably not worth the hassle.
7178                          */
7179                         return false;
7180                 }
7181         case PTR_TO_MAP_VALUE:
7182                 /* If the new min/max/var_off satisfy the old ones and
7183                  * everything else matches, we are OK.
7184                  * 'id' is not compared, since it's only used for maps with
7185                  * bpf_spin_lock inside map element and in such cases if
7186                  * the rest of the prog is valid for one map element then
7187                  * it's valid for all map elements regardless of the key
7188                  * used in bpf_map_lookup()
7189                  */
7190                 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
7191                        range_within(rold, rcur) &&
7192                        tnum_in(rold->var_off, rcur->var_off);
7193         case PTR_TO_MAP_VALUE_OR_NULL:
7194                 /* a PTR_TO_MAP_VALUE could be safe to use as a
7195                  * PTR_TO_MAP_VALUE_OR_NULL into the same map.
7196                  * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
7197                  * checked, doing so could have affected others with the same
7198                  * id, and we can't check for that because we lost the id when
7199                  * we converted to a PTR_TO_MAP_VALUE.
7200                  */
7201                 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
7202                         return false;
7203                 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
7204                         return false;
7205                 /* Check our ids match any regs they're supposed to */
7206                 return check_ids(rold->id, rcur->id, idmap);
7207         case PTR_TO_PACKET_META:
7208         case PTR_TO_PACKET:
7209                 if (rcur->type != rold->type)
7210                         return false;
7211                 /* We must have at least as much range as the old ptr
7212                  * did, so that any accesses which were safe before are
7213                  * still safe.  This is true even if old range < old off,
7214                  * since someone could have accessed through (ptr - k), or
7215                  * even done ptr -= k in a register, to get a safe access.
7216                  */
7217                 if (rold->range > rcur->range)
7218                         return false;
7219                 /* If the offsets don't match, we can't trust our alignment;
7220                  * nor can we be sure that we won't fall out of range.
7221                  */
7222                 if (rold->off != rcur->off)
7223                         return false;
7224                 /* id relations must be preserved */
7225                 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
7226                         return false;
7227                 /* new val must satisfy old val knowledge */
7228                 return range_within(rold, rcur) &&
7229                        tnum_in(rold->var_off, rcur->var_off);
7230         case PTR_TO_CTX:
7231         case CONST_PTR_TO_MAP:
7232         case PTR_TO_PACKET_END:
7233         case PTR_TO_FLOW_KEYS:
7234         case PTR_TO_SOCKET:
7235         case PTR_TO_SOCKET_OR_NULL:
7236         case PTR_TO_SOCK_COMMON:
7237         case PTR_TO_SOCK_COMMON_OR_NULL:
7238         case PTR_TO_TCP_SOCK:
7239         case PTR_TO_TCP_SOCK_OR_NULL:
7240         case PTR_TO_XDP_SOCK:
7241                 /* Only valid matches are exact, which memcmp() above
7242                  * would have accepted
7243                  */
7244         default:
7245                 /* Don't know what's going on, just say it's not safe */
7246                 return false;
7247         }
7248
7249         /* Shouldn't get here; if we do, say it's not safe */
7250         WARN_ON_ONCE(1);
7251         return false;
7252 }
7253
7254 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
7255                       struct bpf_func_state *cur, struct bpf_id_pair *idmap)
7256 {
7257         int i, spi;
7258
7259         /* walk slots of the explored stack and ignore any additional
7260          * slots in the current stack, since explored(safe) state
7261          * didn't use them
7262          */
7263         for (i = 0; i < old->allocated_stack; i++) {
7264                 spi = i / BPF_REG_SIZE;
7265
7266                 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
7267                         i += BPF_REG_SIZE - 1;
7268                         /* explored state didn't use this */
7269                         continue;
7270                 }
7271
7272                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
7273                         continue;
7274
7275                 /* explored stack has more populated slots than current stack
7276                  * and these slots were used
7277                  */
7278                 if (i >= cur->allocated_stack)
7279                         return false;
7280
7281                 /* if old state was safe with misc data in the stack
7282                  * it will be safe with zero-initialized stack.
7283                  * The opposite is not true
7284                  */
7285                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
7286                     cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
7287                         continue;
7288                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
7289                     cur->stack[spi].slot_type[i % BPF_REG_SIZE])
7290                         /* Ex: old explored (safe) state has STACK_SPILL in
7291                          * this stack slot, but current has has STACK_MISC ->
7292                          * this verifier states are not equivalent,
7293                          * return false to continue verification of this path
7294                          */
7295                         return false;
7296                 if (i % BPF_REG_SIZE)
7297                         continue;
7298                 if (old->stack[spi].slot_type[0] != STACK_SPILL)
7299                         continue;
7300                 if (!regsafe(env, &old->stack[spi].spilled_ptr,
7301                              &cur->stack[spi].spilled_ptr, idmap))
7302                         /* when explored and current stack slot are both storing
7303                          * spilled registers, check that stored pointers types
7304                          * are the same as well.
7305                          * Ex: explored safe path could have stored
7306                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
7307                          * but current path has stored:
7308                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
7309                          * such verifier states are not equivalent.
7310                          * return false to continue verification of this path
7311                          */
7312                         return false;
7313         }
7314         return true;
7315 }
7316
7317 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
7318 {
7319         if (old->acquired_refs != cur->acquired_refs)
7320                 return false;
7321         return !memcmp(old->refs, cur->refs,
7322                        sizeof(*old->refs) * old->acquired_refs);
7323 }
7324
7325 /* compare two verifier states
7326  *
7327  * all states stored in state_list are known to be valid, since
7328  * verifier reached 'bpf_exit' instruction through them
7329  *
7330  * this function is called when verifier exploring different branches of
7331  * execution popped from the state stack. If it sees an old state that has
7332  * more strict register state and more strict stack state then this execution
7333  * branch doesn't need to be explored further, since verifier already
7334  * concluded that more strict state leads to valid finish.
7335  *
7336  * Therefore two states are equivalent if register state is more conservative
7337  * and explored stack state is more conservative than the current one.
7338  * Example:
7339  *       explored                   current
7340  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
7341  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
7342  *
7343  * In other words if current stack state (one being explored) has more
7344  * valid slots than old one that already passed validation, it means
7345  * the verifier can stop exploring and conclude that current state is valid too
7346  *
7347  * Similarly with registers. If explored state has register type as invalid
7348  * whereas register type in current state is meaningful, it means that
7349  * the current state will reach 'bpf_exit' instruction safely
7350  */
7351 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
7352                               struct bpf_func_state *cur)
7353 {
7354         int i;
7355
7356         memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
7357         for (i = 0; i < MAX_BPF_REG; i++)
7358                 if (!regsafe(env, &old->regs[i], &cur->regs[i],
7359                              env->idmap_scratch))
7360                         return false;
7361
7362         if (!stacksafe(env, old, cur, env->idmap_scratch))
7363                 return false;
7364
7365         if (!refsafe(old, cur))
7366                 return false;
7367
7368         return true;
7369 }
7370
7371 static bool states_equal(struct bpf_verifier_env *env,
7372                          struct bpf_verifier_state *old,
7373                          struct bpf_verifier_state *cur)
7374 {
7375         int i;
7376
7377         if (old->curframe != cur->curframe)
7378                 return false;
7379
7380         /* Verification state from speculative execution simulation
7381          * must never prune a non-speculative execution one.
7382          */
7383         if (old->speculative && !cur->speculative)
7384                 return false;
7385
7386         if (old->active_spin_lock != cur->active_spin_lock)
7387                 return false;
7388
7389         /* for states to be equal callsites have to be the same
7390          * and all frame states need to be equivalent
7391          */
7392         for (i = 0; i <= old->curframe; i++) {
7393                 if (old->frame[i]->callsite != cur->frame[i]->callsite)
7394                         return false;
7395                 if (!func_states_equal(env, old->frame[i], cur->frame[i]))
7396                         return false;
7397         }
7398         return true;
7399 }
7400
7401 /* Return 0 if no propagation happened. Return negative error code if error
7402  * happened. Otherwise, return the propagated bit.
7403  */
7404 static int propagate_liveness_reg(struct bpf_verifier_env *env,
7405                                   struct bpf_reg_state *reg,
7406                                   struct bpf_reg_state *parent_reg)
7407 {
7408         u8 parent_flag = parent_reg->live & REG_LIVE_READ;
7409         u8 flag = reg->live & REG_LIVE_READ;
7410         int err;
7411
7412         /* When comes here, read flags of PARENT_REG or REG could be any of
7413          * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
7414          * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
7415          */
7416         if (parent_flag == REG_LIVE_READ64 ||
7417             /* Or if there is no read flag from REG. */
7418             !flag ||
7419             /* Or if the read flag from REG is the same as PARENT_REG. */
7420             parent_flag == flag)
7421                 return 0;
7422
7423         err = mark_reg_read(env, reg, parent_reg, flag);
7424         if (err)
7425                 return err;
7426
7427         return flag;
7428 }
7429
7430 /* A write screens off any subsequent reads; but write marks come from the
7431  * straight-line code between a state and its parent.  When we arrive at an
7432  * equivalent state (jump target or such) we didn't arrive by the straight-line
7433  * code, so read marks in the state must propagate to the parent regardless
7434  * of the state's write marks. That's what 'parent == state->parent' comparison
7435  * in mark_reg_read() is for.
7436  */
7437 static int propagate_liveness(struct bpf_verifier_env *env,
7438                               const struct bpf_verifier_state *vstate,
7439                               struct bpf_verifier_state *vparent)
7440 {
7441         struct bpf_reg_state *state_reg, *parent_reg;
7442         struct bpf_func_state *state, *parent;
7443         int i, frame, err = 0;
7444
7445         if (vparent->curframe != vstate->curframe) {
7446                 WARN(1, "propagate_live: parent frame %d current frame %d\n",
7447                      vparent->curframe, vstate->curframe);
7448                 return -EFAULT;
7449         }
7450         /* Propagate read liveness of registers... */
7451         BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
7452         for (frame = 0; frame <= vstate->curframe; frame++) {
7453                 parent = vparent->frame[frame];
7454                 state = vstate->frame[frame];
7455                 parent_reg = parent->regs;
7456                 state_reg = state->regs;
7457                 /* We don't need to worry about FP liveness, it's read-only */
7458                 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
7459                         err = propagate_liveness_reg(env, &state_reg[i],
7460                                                      &parent_reg[i]);
7461                         if (err < 0)
7462                                 return err;
7463                         if (err == REG_LIVE_READ64)
7464                                 mark_insn_zext(env, &parent_reg[i]);
7465                 }
7466
7467                 /* Propagate stack slots. */
7468                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
7469                             i < parent->allocated_stack / BPF_REG_SIZE; i++) {
7470                         parent_reg = &parent->stack[i].spilled_ptr;
7471                         state_reg = &state->stack[i].spilled_ptr;
7472                         err = propagate_liveness_reg(env, state_reg,
7473                                                      parent_reg);
7474                         if (err < 0)
7475                                 return err;
7476                 }
7477         }
7478         return 0;
7479 }
7480
7481 /* find precise scalars in the previous equivalent state and
7482  * propagate them into the current state
7483  */
7484 static int propagate_precision(struct bpf_verifier_env *env,
7485                                const struct bpf_verifier_state *old)
7486 {
7487         struct bpf_reg_state *state_reg;
7488         struct bpf_func_state *state;
7489         int i, err = 0;
7490
7491         state = old->frame[old->curframe];
7492         state_reg = state->regs;
7493         for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
7494                 if (state_reg->type != SCALAR_VALUE ||
7495                     !state_reg->precise)
7496                         continue;
7497                 if (env->log.level & BPF_LOG_LEVEL2)
7498                         verbose(env, "propagating r%d\n", i);
7499                 err = mark_chain_precision(env, i);
7500                 if (err < 0)
7501                         return err;
7502         }
7503
7504         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
7505                 if (state->stack[i].slot_type[0] != STACK_SPILL)
7506                         continue;
7507                 state_reg = &state->stack[i].spilled_ptr;
7508                 if (state_reg->type != SCALAR_VALUE ||
7509                     !state_reg->precise)
7510                         continue;
7511                 if (env->log.level & BPF_LOG_LEVEL2)
7512                         verbose(env, "propagating fp%d\n",
7513                                 (-i - 1) * BPF_REG_SIZE);
7514                 err = mark_chain_precision_stack(env, i);
7515                 if (err < 0)
7516                         return err;
7517         }
7518         return 0;
7519 }
7520
7521 static bool states_maybe_looping(struct bpf_verifier_state *old,
7522                                  struct bpf_verifier_state *cur)
7523 {
7524         struct bpf_func_state *fold, *fcur;
7525         int i, fr = cur->curframe;
7526
7527         if (old->curframe != fr)
7528                 return false;
7529
7530         fold = old->frame[fr];
7531         fcur = cur->frame[fr];
7532         for (i = 0; i < MAX_BPF_REG; i++)
7533                 if (memcmp(&fold->regs[i], &fcur->regs[i],
7534                            offsetof(struct bpf_reg_state, parent)))
7535                         return false;
7536         return true;
7537 }
7538
7539
7540 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
7541 {
7542         struct bpf_verifier_state_list *new_sl;
7543         struct bpf_verifier_state_list *sl, **pprev;
7544         struct bpf_verifier_state *cur = env->cur_state, *new;
7545         int i, j, err, states_cnt = 0;
7546         bool add_new_state = env->test_state_freq ? true : false;
7547
7548         cur->last_insn_idx = env->prev_insn_idx;
7549         if (!env->insn_aux_data[insn_idx].prune_point)
7550                 /* this 'insn_idx' instruction wasn't marked, so we will not
7551                  * be doing state search here
7552                  */
7553                 return 0;
7554
7555         /* bpf progs typically have pruning point every 4 instructions
7556          * http://vger.kernel.org/bpfconf2019.html#session-1
7557          * Do not add new state for future pruning if the verifier hasn't seen
7558          * at least 2 jumps and at least 8 instructions.
7559          * This heuristics helps decrease 'total_states' and 'peak_states' metric.
7560          * In tests that amounts to up to 50% reduction into total verifier
7561          * memory consumption and 20% verifier time speedup.
7562          */
7563         if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
7564             env->insn_processed - env->prev_insn_processed >= 8)
7565                 add_new_state = true;
7566
7567         pprev = explored_state(env, insn_idx);
7568         sl = *pprev;
7569
7570         clean_live_states(env, insn_idx, cur);
7571
7572         while (sl) {
7573                 states_cnt++;
7574                 if (sl->state.insn_idx != insn_idx)
7575                         goto next;
7576                 if (sl->state.branches) {
7577                         if (states_maybe_looping(&sl->state, cur) &&
7578                             states_equal(env, &sl->state, cur)) {
7579                                 verbose_linfo(env, insn_idx, "; ");
7580                                 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
7581                                 return -EINVAL;
7582                         }
7583                         /* if the verifier is processing a loop, avoid adding new state
7584                          * too often, since different loop iterations have distinct
7585                          * states and may not help future pruning.
7586                          * This threshold shouldn't be too low to make sure that
7587                          * a loop with large bound will be rejected quickly.
7588                          * The most abusive loop will be:
7589                          * r1 += 1
7590                          * if r1 < 1000000 goto pc-2
7591                          * 1M insn_procssed limit / 100 == 10k peak states.
7592                          * This threshold shouldn't be too high either, since states
7593                          * at the end of the loop are likely to be useful in pruning.
7594                          */
7595                         if (env->jmps_processed - env->prev_jmps_processed < 20 &&
7596                             env->insn_processed - env->prev_insn_processed < 100)
7597                                 add_new_state = false;
7598                         goto miss;
7599                 }
7600                 if (states_equal(env, &sl->state, cur)) {
7601                         sl->hit_cnt++;
7602                         /* reached equivalent register/stack state,
7603                          * prune the search.
7604                          * Registers read by the continuation are read by us.
7605                          * If we have any write marks in env->cur_state, they
7606                          * will prevent corresponding reads in the continuation
7607                          * from reaching our parent (an explored_state).  Our
7608                          * own state will get the read marks recorded, but
7609                          * they'll be immediately forgotten as we're pruning
7610                          * this state and will pop a new one.
7611                          */
7612                         err = propagate_liveness(env, &sl->state, cur);
7613
7614                         /* if previous state reached the exit with precision and
7615                          * current state is equivalent to it (except precsion marks)
7616                          * the precision needs to be propagated back in
7617                          * the current state.
7618                          */
7619                         err = err ? : push_jmp_history(env, cur);
7620                         err = err ? : propagate_precision(env, &sl->state);
7621                         if (err)
7622                                 return err;
7623                         return 1;
7624                 }
7625 miss:
7626                 /* when new state is not going to be added do not increase miss count.
7627                  * Otherwise several loop iterations will remove the state
7628                  * recorded earlier. The goal of these heuristics is to have
7629                  * states from some iterations of the loop (some in the beginning
7630                  * and some at the end) to help pruning.
7631                  */
7632                 if (add_new_state)
7633                         sl->miss_cnt++;
7634                 /* heuristic to determine whether this state is beneficial
7635                  * to keep checking from state equivalence point of view.
7636                  * Higher numbers increase max_states_per_insn and verification time,
7637                  * but do not meaningfully decrease insn_processed.
7638                  */
7639                 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
7640                         /* the state is unlikely to be useful. Remove it to
7641                          * speed up verification
7642                          */
7643                         *pprev = sl->next;
7644                         if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
7645                                 u32 br = sl->state.branches;
7646
7647                                 WARN_ONCE(br,
7648                                           "BUG live_done but branches_to_explore %d\n",
7649                                           br);
7650                                 free_verifier_state(&sl->state, false);
7651                                 kfree(sl);
7652                                 env->peak_states--;
7653                         } else {
7654                                 /* cannot free this state, since parentage chain may
7655                                  * walk it later. Add it for free_list instead to
7656                                  * be freed at the end of verification
7657                                  */
7658                                 sl->next = env->free_list;
7659                                 env->free_list = sl;
7660                         }
7661                         sl = *pprev;
7662                         continue;
7663                 }
7664 next:
7665                 pprev = &sl->next;
7666                 sl = *pprev;
7667         }
7668
7669         if (env->max_states_per_insn < states_cnt)
7670                 env->max_states_per_insn = states_cnt;
7671
7672         if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
7673                 return push_jmp_history(env, cur);
7674
7675         if (!add_new_state)
7676                 return push_jmp_history(env, cur);
7677
7678         /* There were no equivalent states, remember the current one.
7679          * Technically the current state is not proven to be safe yet,
7680          * but it will either reach outer most bpf_exit (which means it's safe)
7681          * or it will be rejected. When there are no loops the verifier won't be
7682          * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
7683          * again on the way to bpf_exit.
7684          * When looping the sl->state.branches will be > 0 and this state
7685          * will not be considered for equivalence until branches == 0.
7686          */
7687         new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
7688         if (!new_sl)
7689                 return -ENOMEM;
7690         env->total_states++;
7691         env->peak_states++;
7692         env->prev_jmps_processed = env->jmps_processed;
7693         env->prev_insn_processed = env->insn_processed;
7694
7695         /* add new state to the head of linked list */
7696         new = &new_sl->state;
7697         err = copy_verifier_state(new, cur);
7698         if (err) {
7699                 free_verifier_state(new, false);
7700                 kfree(new_sl);
7701                 return err;
7702         }
7703         new->insn_idx = insn_idx;
7704         WARN_ONCE(new->branches != 1,
7705                   "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
7706
7707         cur->parent = new;
7708         cur->first_insn_idx = insn_idx;
7709         clear_jmp_history(cur);
7710         new_sl->next = *explored_state(env, insn_idx);
7711         *explored_state(env, insn_idx) = new_sl;
7712         /* connect new state to parentage chain. Current frame needs all
7713          * registers connected. Only r6 - r9 of the callers are alive (pushed
7714          * to the stack implicitly by JITs) so in callers' frames connect just
7715          * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
7716          * the state of the call instruction (with WRITTEN set), and r0 comes
7717          * from callee with its full parentage chain, anyway.
7718          */
7719         /* clear write marks in current state: the writes we did are not writes
7720          * our child did, so they don't screen off its reads from us.
7721          * (There are no read marks in current state, because reads always mark
7722          * their parent and current state never has children yet.  Only
7723          * explored_states can get read marks.)
7724          */
7725         for (j = 0; j <= cur->curframe; j++) {
7726                 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
7727                         cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
7728                 for (i = 0; i < BPF_REG_FP; i++)
7729                         cur->frame[j]->regs[i].live = REG_LIVE_NONE;
7730         }
7731
7732         /* all stack frames are accessible from callee, clear them all */
7733         for (j = 0; j <= cur->curframe; j++) {
7734                 struct bpf_func_state *frame = cur->frame[j];
7735                 struct bpf_func_state *newframe = new->frame[j];
7736
7737                 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
7738                         frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
7739                         frame->stack[i].spilled_ptr.parent =
7740                                                 &newframe->stack[i].spilled_ptr;
7741                 }
7742         }
7743         return 0;
7744 }
7745
7746 /* Return true if it's OK to have the same insn return a different type. */
7747 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
7748 {
7749         switch (type) {
7750         case PTR_TO_CTX:
7751         case PTR_TO_SOCKET:
7752         case PTR_TO_SOCKET_OR_NULL:
7753         case PTR_TO_SOCK_COMMON:
7754         case PTR_TO_SOCK_COMMON_OR_NULL:
7755         case PTR_TO_TCP_SOCK:
7756         case PTR_TO_TCP_SOCK_OR_NULL:
7757         case PTR_TO_XDP_SOCK:
7758                 return false;
7759         default:
7760                 return true;
7761         }
7762 }
7763
7764 /* If an instruction was previously used with particular pointer types, then we
7765  * need to be careful to avoid cases such as the below, where it may be ok
7766  * for one branch accessing the pointer, but not ok for the other branch:
7767  *
7768  * R1 = sock_ptr
7769  * goto X;
7770  * ...
7771  * R1 = some_other_valid_ptr;
7772  * goto X;
7773  * ...
7774  * R2 = *(u32 *)(R1 + 0);
7775  */
7776 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
7777 {
7778         return src != prev && (!reg_type_mismatch_ok(src) ||
7779                                !reg_type_mismatch_ok(prev));
7780 }
7781
7782 static int do_check(struct bpf_verifier_env *env)
7783 {
7784         struct bpf_verifier_state *state;
7785         struct bpf_insn *insns = env->prog->insnsi;
7786         struct bpf_reg_state *regs;
7787         int insn_cnt = env->prog->len;
7788         bool do_print_state = false;
7789         int prev_insn_idx = -1;
7790
7791         env->prev_linfo = NULL;
7792
7793         state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
7794         if (!state)
7795                 return -ENOMEM;
7796         state->curframe = 0;
7797         state->speculative = false;
7798         state->branches = 1;
7799         state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
7800         if (!state->frame[0]) {
7801                 kfree(state);
7802                 return -ENOMEM;
7803         }
7804         env->cur_state = state;
7805         init_func_state(env, state->frame[0],
7806                         BPF_MAIN_FUNC /* callsite */,
7807                         0 /* frameno */,
7808                         0 /* subprogno, zero == main subprog */);
7809
7810         for (;;) {
7811                 struct bpf_insn *insn;
7812                 u8 class;
7813                 int err;
7814
7815                 env->prev_insn_idx = prev_insn_idx;
7816                 if (env->insn_idx >= insn_cnt) {
7817                         verbose(env, "invalid insn idx %d insn_cnt %d\n",
7818                                 env->insn_idx, insn_cnt);
7819                         return -EFAULT;
7820                 }
7821
7822                 insn = &insns[env->insn_idx];
7823                 class = BPF_CLASS(insn->code);
7824
7825                 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
7826                         verbose(env,
7827                                 "BPF program is too large. Processed %d insn\n",
7828                                 env->insn_processed);
7829                         return -E2BIG;
7830                 }
7831
7832                 err = is_state_visited(env, env->insn_idx);
7833                 if (err < 0)
7834                         return err;
7835                 if (err == 1) {
7836                         /* found equivalent state, can prune the search */
7837                         if (env->log.level & BPF_LOG_LEVEL) {
7838                                 if (do_print_state)
7839                                         verbose(env, "\nfrom %d to %d%s: safe\n",
7840                                                 env->prev_insn_idx, env->insn_idx,
7841                                                 env->cur_state->speculative ?
7842                                                 " (speculative execution)" : "");
7843                                 else
7844                                         verbose(env, "%d: safe\n", env->insn_idx);
7845                         }
7846                         goto process_bpf_exit;
7847                 }
7848
7849                 if (signal_pending(current))
7850                         return -EAGAIN;
7851
7852                 if (need_resched())
7853                         cond_resched();
7854
7855                 if (env->log.level & BPF_LOG_LEVEL2 ||
7856                     (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
7857                         if (env->log.level & BPF_LOG_LEVEL2)
7858                                 verbose(env, "%d:", env->insn_idx);
7859                         else
7860                                 verbose(env, "\nfrom %d to %d%s:",
7861                                         env->prev_insn_idx, env->insn_idx,
7862                                         env->cur_state->speculative ?
7863                                         " (speculative execution)" : "");
7864                         print_verifier_state(env, state->frame[state->curframe]);
7865                         do_print_state = false;
7866                 }
7867
7868                 if (env->log.level & BPF_LOG_LEVEL) {
7869                         const struct bpf_insn_cbs cbs = {
7870                                 .cb_print       = verbose,
7871                                 .private_data   = env,
7872                         };
7873
7874                         verbose_linfo(env, env->insn_idx, "; ");
7875                         verbose(env, "%d: ", env->insn_idx);
7876                         print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
7877                 }
7878
7879                 if (bpf_prog_is_dev_bound(env->prog->aux)) {
7880                         err = bpf_prog_offload_verify_insn(env, env->insn_idx,
7881                                                            env->prev_insn_idx);
7882                         if (err)
7883                                 return err;
7884                 }
7885
7886                 regs = cur_regs(env);
7887                 sanitize_mark_insn_seen(env);
7888                 prev_insn_idx = env->insn_idx;
7889
7890                 if (class == BPF_ALU || class == BPF_ALU64) {
7891                         err = check_alu_op(env, insn);
7892                         if (err)
7893                                 return err;
7894
7895                 } else if (class == BPF_LDX) {
7896                         enum bpf_reg_type *prev_src_type, src_reg_type;
7897
7898                         /* check for reserved fields is already done */
7899
7900                         /* check src operand */
7901                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
7902                         if (err)
7903                                 return err;
7904
7905                         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7906                         if (err)
7907                                 return err;
7908
7909                         src_reg_type = regs[insn->src_reg].type;
7910
7911                         /* check that memory (src_reg + off) is readable,
7912                          * the state of dst_reg will be updated by this func
7913                          */
7914                         err = check_mem_access(env, env->insn_idx, insn->src_reg,
7915                                                insn->off, BPF_SIZE(insn->code),
7916                                                BPF_READ, insn->dst_reg, false);
7917                         if (err)
7918                                 return err;
7919
7920                         prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
7921
7922                         if (*prev_src_type == NOT_INIT) {
7923                                 /* saw a valid insn
7924                                  * dst_reg = *(u32 *)(src_reg + off)
7925                                  * save type to validate intersecting paths
7926                                  */
7927                                 *prev_src_type = src_reg_type;
7928
7929                         } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
7930                                 /* ABuser program is trying to use the same insn
7931                                  * dst_reg = *(u32*) (src_reg + off)
7932                                  * with different pointer types:
7933                                  * src_reg == ctx in one branch and
7934                                  * src_reg == stack|map in some other branch.
7935                                  * Reject it.
7936                                  */
7937                                 verbose(env, "same insn cannot be used with different pointers\n");
7938                                 return -EINVAL;
7939                         }
7940
7941                 } else if (class == BPF_STX) {
7942                         enum bpf_reg_type *prev_dst_type, dst_reg_type;
7943
7944                         if (BPF_MODE(insn->code) == BPF_XADD) {
7945                                 err = check_xadd(env, env->insn_idx, insn);
7946                                 if (err)
7947                                         return err;
7948                                 env->insn_idx++;
7949                                 continue;
7950                         }
7951
7952                         /* check src1 operand */
7953                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
7954                         if (err)
7955                                 return err;
7956                         /* check src2 operand */
7957                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7958                         if (err)
7959                                 return err;
7960
7961                         dst_reg_type = regs[insn->dst_reg].type;
7962
7963                         /* check that memory (dst_reg + off) is writeable */
7964                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
7965                                                insn->off, BPF_SIZE(insn->code),
7966                                                BPF_WRITE, insn->src_reg, false);
7967                         if (err)
7968                                 return err;
7969
7970                         prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
7971
7972                         if (*prev_dst_type == NOT_INIT) {
7973                                 *prev_dst_type = dst_reg_type;
7974                         } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
7975                                 verbose(env, "same insn cannot be used with different pointers\n");
7976                                 return -EINVAL;
7977                         }
7978
7979                 } else if (class == BPF_ST) {
7980                         if (BPF_MODE(insn->code) != BPF_MEM ||
7981                             insn->src_reg != BPF_REG_0) {
7982                                 verbose(env, "BPF_ST uses reserved fields\n");
7983                                 return -EINVAL;
7984                         }
7985                         /* check src operand */
7986                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7987                         if (err)
7988                                 return err;
7989
7990                         if (is_ctx_reg(env, insn->dst_reg)) {
7991                                 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
7992                                         insn->dst_reg,
7993                                         reg_type_str[reg_state(env, insn->dst_reg)->type]);
7994                                 return -EACCES;
7995                         }
7996
7997                         /* check that memory (dst_reg + off) is writeable */
7998                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
7999                                                insn->off, BPF_SIZE(insn->code),
8000                                                BPF_WRITE, -1, false);
8001                         if (err)
8002                                 return err;
8003
8004                 } else if (class == BPF_JMP || class == BPF_JMP32) {
8005                         u8 opcode = BPF_OP(insn->code);
8006
8007                         env->jmps_processed++;
8008                         if (opcode == BPF_CALL) {
8009                                 if (BPF_SRC(insn->code) != BPF_K ||
8010                                     insn->off != 0 ||
8011                                     (insn->src_reg != BPF_REG_0 &&
8012                                      insn->src_reg != BPF_PSEUDO_CALL) ||
8013                                     insn->dst_reg != BPF_REG_0 ||
8014                                     class == BPF_JMP32) {
8015                                         verbose(env, "BPF_CALL uses reserved fields\n");
8016                                         return -EINVAL;
8017                                 }
8018
8019                                 if (env->cur_state->active_spin_lock &&
8020                                     (insn->src_reg == BPF_PSEUDO_CALL ||
8021                                      insn->imm != BPF_FUNC_spin_unlock)) {
8022                                         verbose(env, "function calls are not allowed while holding a lock\n");
8023                                         return -EINVAL;
8024                                 }
8025                                 if (insn->src_reg == BPF_PSEUDO_CALL)
8026                                         err = check_func_call(env, insn, &env->insn_idx);
8027                                 else
8028                                         err = check_helper_call(env, insn->imm, env->insn_idx);
8029                                 if (err)
8030                                         return err;
8031
8032                         } else if (opcode == BPF_JA) {
8033                                 if (BPF_SRC(insn->code) != BPF_K ||
8034                                     insn->imm != 0 ||
8035                                     insn->src_reg != BPF_REG_0 ||
8036                                     insn->dst_reg != BPF_REG_0 ||
8037                                     class == BPF_JMP32) {
8038                                         verbose(env, "BPF_JA uses reserved fields\n");
8039                                         return -EINVAL;
8040                                 }
8041
8042                                 env->insn_idx += insn->off + 1;
8043                                 continue;
8044
8045                         } else if (opcode == BPF_EXIT) {
8046                                 if (BPF_SRC(insn->code) != BPF_K ||
8047                                     insn->imm != 0 ||
8048                                     insn->src_reg != BPF_REG_0 ||
8049                                     insn->dst_reg != BPF_REG_0 ||
8050                                     class == BPF_JMP32) {
8051                                         verbose(env, "BPF_EXIT uses reserved fields\n");
8052                                         return -EINVAL;
8053                                 }
8054
8055                                 if (env->cur_state->active_spin_lock) {
8056                                         verbose(env, "bpf_spin_unlock is missing\n");
8057                                         return -EINVAL;
8058                                 }
8059
8060                                 if (state->curframe) {
8061                                         /* exit from nested function */
8062                                         err = prepare_func_exit(env, &env->insn_idx);
8063                                         if (err)
8064                                                 return err;
8065                                         do_print_state = true;
8066                                         continue;
8067                                 }
8068
8069                                 err = check_reference_leak(env);
8070                                 if (err)
8071                                         return err;
8072
8073                                 /* eBPF calling convetion is such that R0 is used
8074                                  * to return the value from eBPF program.
8075                                  * Make sure that it's readable at this time
8076                                  * of bpf_exit, which means that program wrote
8077                                  * something into it earlier
8078                                  */
8079                                 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
8080                                 if (err)
8081                                         return err;
8082
8083                                 if (is_pointer_value(env, BPF_REG_0)) {
8084                                         verbose(env, "R0 leaks addr as return value\n");
8085                                         return -EACCES;
8086                                 }
8087
8088                                 err = check_return_code(env);
8089                                 if (err)
8090                                         return err;
8091 process_bpf_exit:
8092                                 update_branch_counts(env, env->cur_state);
8093                                 err = pop_stack(env, &prev_insn_idx,
8094                                                 &env->insn_idx);
8095                                 if (err < 0) {
8096                                         if (err != -ENOENT)
8097                                                 return err;
8098                                         break;
8099                                 } else {
8100                                         do_print_state = true;
8101                                         continue;
8102                                 }
8103                         } else {
8104                                 err = check_cond_jmp_op(env, insn, &env->insn_idx);
8105                                 if (err)
8106                                         return err;
8107                         }
8108                 } else if (class == BPF_LD) {
8109                         u8 mode = BPF_MODE(insn->code);
8110
8111                         if (mode == BPF_ABS || mode == BPF_IND) {
8112                                 err = check_ld_abs(env, insn);
8113                                 if (err)
8114                                         return err;
8115
8116                         } else if (mode == BPF_IMM) {
8117                                 err = check_ld_imm(env, insn);
8118                                 if (err)
8119                                         return err;
8120
8121                                 env->insn_idx++;
8122                                 sanitize_mark_insn_seen(env);
8123                         } else {
8124                                 verbose(env, "invalid BPF_LD mode\n");
8125                                 return -EINVAL;
8126                         }
8127                 } else {
8128                         verbose(env, "unknown insn class %d\n", class);
8129                         return -EINVAL;
8130                 }
8131
8132                 env->insn_idx++;
8133         }
8134
8135         env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
8136         return 0;
8137 }
8138
8139 static int check_map_prealloc(struct bpf_map *map)
8140 {
8141         return (map->map_type != BPF_MAP_TYPE_HASH &&
8142                 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
8143                 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
8144                 !(map->map_flags & BPF_F_NO_PREALLOC);
8145 }
8146
8147 static bool is_tracing_prog_type(enum bpf_prog_type type)
8148 {
8149         switch (type) {
8150         case BPF_PROG_TYPE_KPROBE:
8151         case BPF_PROG_TYPE_TRACEPOINT:
8152         case BPF_PROG_TYPE_PERF_EVENT:
8153         case BPF_PROG_TYPE_RAW_TRACEPOINT:
8154                 return true;
8155         default:
8156                 return false;
8157         }
8158 }
8159
8160 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
8161                                         struct bpf_map *map,
8162                                         struct bpf_prog *prog)
8163
8164 {
8165         /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
8166          * preallocated hash maps, since doing memory allocation
8167          * in overflow_handler can crash depending on where nmi got
8168          * triggered.
8169          */
8170         if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
8171                 if (!check_map_prealloc(map)) {
8172                         verbose(env, "perf_event programs can only use preallocated hash map\n");
8173                         return -EINVAL;
8174                 }
8175                 if (map->inner_map_meta &&
8176                     !check_map_prealloc(map->inner_map_meta)) {
8177                         verbose(env, "perf_event programs can only use preallocated inner hash map\n");
8178                         return -EINVAL;
8179                 }
8180         }
8181
8182         if ((is_tracing_prog_type(prog->type) ||
8183              prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
8184             map_value_has_spin_lock(map)) {
8185                 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
8186                 return -EINVAL;
8187         }
8188
8189         if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
8190             !bpf_offload_prog_map_match(prog, map)) {
8191                 verbose(env, "offload device mismatch between prog and map\n");
8192                 return -EINVAL;
8193         }
8194
8195         return 0;
8196 }
8197
8198 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
8199 {
8200         return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
8201                 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
8202 }
8203
8204 /* look for pseudo eBPF instructions that access map FDs and
8205  * replace them with actual map pointers
8206  */
8207 static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
8208 {
8209         struct bpf_insn *insn = env->prog->insnsi;
8210         int insn_cnt = env->prog->len;
8211         int i, j, err;
8212
8213         err = bpf_prog_calc_tag(env->prog);
8214         if (err)
8215                 return err;
8216
8217         for (i = 0; i < insn_cnt; i++, insn++) {
8218                 if (BPF_CLASS(insn->code) == BPF_LDX &&
8219                     (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
8220                         verbose(env, "BPF_LDX uses reserved fields\n");
8221                         return -EINVAL;
8222                 }
8223
8224                 if (BPF_CLASS(insn->code) == BPF_STX &&
8225                     ((BPF_MODE(insn->code) != BPF_MEM &&
8226                       BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
8227                         verbose(env, "BPF_STX uses reserved fields\n");
8228                         return -EINVAL;
8229                 }
8230
8231                 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
8232                         struct bpf_insn_aux_data *aux;
8233                         struct bpf_map *map;
8234                         struct fd f;
8235                         u64 addr;
8236
8237                         if (i == insn_cnt - 1 || insn[1].code != 0 ||
8238                             insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
8239                             insn[1].off != 0) {
8240                                 verbose(env, "invalid bpf_ld_imm64 insn\n");
8241                                 return -EINVAL;
8242                         }
8243
8244                         if (insn[0].src_reg == 0)
8245                                 /* valid generic load 64-bit imm */
8246                                 goto next_insn;
8247
8248                         /* In final convert_pseudo_ld_imm64() step, this is
8249                          * converted into regular 64-bit imm load insn.
8250                          */
8251                         if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
8252                              insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
8253                             (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
8254                              insn[1].imm != 0)) {
8255                                 verbose(env,
8256                                         "unrecognized bpf_ld_imm64 insn\n");
8257                                 return -EINVAL;
8258                         }
8259
8260                         f = fdget(insn[0].imm);
8261                         map = __bpf_map_get(f);
8262                         if (IS_ERR(map)) {
8263                                 verbose(env, "fd %d is not pointing to valid bpf_map\n",
8264                                         insn[0].imm);
8265                                 return PTR_ERR(map);
8266                         }
8267
8268                         err = check_map_prog_compatibility(env, map, env->prog);
8269                         if (err) {
8270                                 fdput(f);
8271                                 return err;
8272                         }
8273
8274                         aux = &env->insn_aux_data[i];
8275                         if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
8276                                 addr = (unsigned long)map;
8277                         } else {
8278                                 u32 off = insn[1].imm;
8279
8280                                 if (off >= BPF_MAX_VAR_OFF) {
8281                                         verbose(env, "direct value offset of %u is not allowed\n", off);
8282                                         fdput(f);
8283                                         return -EINVAL;
8284                                 }
8285
8286                                 if (!map->ops->map_direct_value_addr) {
8287                                         verbose(env, "no direct value access support for this map type\n");
8288                                         fdput(f);
8289                                         return -EINVAL;
8290                                 }
8291
8292                                 err = map->ops->map_direct_value_addr(map, &addr, off);
8293                                 if (err) {
8294                                         verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
8295                                                 map->value_size, off);
8296                                         fdput(f);
8297                                         return err;
8298                                 }
8299
8300                                 aux->map_off = off;
8301                                 addr += off;
8302                         }
8303
8304                         insn[0].imm = (u32)addr;
8305                         insn[1].imm = addr >> 32;
8306
8307                         /* check whether we recorded this map already */
8308                         for (j = 0; j < env->used_map_cnt; j++) {
8309                                 if (env->used_maps[j] == map) {
8310                                         aux->map_index = j;
8311                                         fdput(f);
8312                                         goto next_insn;
8313                                 }
8314                         }
8315
8316                         if (env->used_map_cnt >= MAX_USED_MAPS) {
8317                                 fdput(f);
8318                                 return -E2BIG;
8319                         }
8320
8321                         /* hold the map. If the program is rejected by verifier,
8322                          * the map will be released by release_maps() or it
8323                          * will be used by the valid program until it's unloaded
8324                          * and all maps are released in free_used_maps()
8325                          */
8326                         map = bpf_map_inc(map, false);
8327                         if (IS_ERR(map)) {
8328                                 fdput(f);
8329                                 return PTR_ERR(map);
8330                         }
8331
8332                         aux->map_index = env->used_map_cnt;
8333                         env->used_maps[env->used_map_cnt++] = map;
8334
8335                         if (bpf_map_is_cgroup_storage(map) &&
8336                             bpf_cgroup_storage_assign(env->prog, map)) {
8337                                 verbose(env, "only one cgroup storage of each type is allowed\n");
8338                                 fdput(f);
8339                                 return -EBUSY;
8340                         }
8341
8342                         fdput(f);
8343 next_insn:
8344                         insn++;
8345                         i++;
8346                         continue;
8347                 }
8348
8349                 /* Basic sanity check before we invest more work here. */
8350                 if (!bpf_opcode_in_insntable(insn->code)) {
8351                         verbose(env, "unknown opcode %02x\n", insn->code);
8352                         return -EINVAL;
8353                 }
8354         }
8355
8356         /* now all pseudo BPF_LD_IMM64 instructions load valid
8357          * 'struct bpf_map *' into a register instead of user map_fd.
8358          * These pointers will be used later by verifier to validate map access.
8359          */
8360         return 0;
8361 }
8362
8363 /* drop refcnt of maps used by the rejected program */
8364 static void release_maps(struct bpf_verifier_env *env)
8365 {
8366         enum bpf_cgroup_storage_type stype;
8367         int i;
8368
8369         for_each_cgroup_storage_type(stype) {
8370                 if (!env->prog->aux->cgroup_storage[stype])
8371                         continue;
8372                 bpf_cgroup_storage_release(env->prog,
8373                         env->prog->aux->cgroup_storage[stype]);
8374         }
8375
8376         for (i = 0; i < env->used_map_cnt; i++)
8377                 bpf_map_put(env->used_maps[i]);
8378 }
8379
8380 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
8381 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
8382 {
8383         struct bpf_insn *insn = env->prog->insnsi;
8384         int insn_cnt = env->prog->len;
8385         int i;
8386
8387         for (i = 0; i < insn_cnt; i++, insn++)
8388                 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
8389                         insn->src_reg = 0;
8390 }
8391
8392 /* single env->prog->insni[off] instruction was replaced with the range
8393  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
8394  * [0, off) and [off, end) to new locations, so the patched range stays zero
8395  */
8396 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
8397                                  struct bpf_insn_aux_data *new_data,
8398                                  struct bpf_prog *new_prog, u32 off, u32 cnt)
8399 {
8400         struct bpf_insn_aux_data *old_data = env->insn_aux_data;
8401         struct bpf_insn *insn = new_prog->insnsi;
8402         bool old_seen = old_data[off].seen;
8403         u32 prog_len;
8404         int i;
8405
8406         /* aux info at OFF always needs adjustment, no matter fast path
8407          * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
8408          * original insn at old prog.
8409          */
8410         old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
8411
8412         if (cnt == 1)
8413                 return;
8414         prog_len = new_prog->len;
8415
8416         memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
8417         memcpy(new_data + off + cnt - 1, old_data + off,
8418                sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
8419         for (i = off; i < off + cnt - 1; i++) {
8420                 /* Expand insni[off]'s seen count to the patched range. */
8421                 new_data[i].seen = old_seen;
8422                 new_data[i].zext_dst = insn_has_def32(env, insn + i);
8423         }
8424         env->insn_aux_data = new_data;
8425         vfree(old_data);
8426 }
8427
8428 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
8429 {
8430         int i;
8431
8432         if (len == 1)
8433                 return;
8434         /* NOTE: fake 'exit' subprog should be updated as well. */
8435         for (i = 0; i <= env->subprog_cnt; i++) {
8436                 if (env->subprog_info[i].start <= off)
8437                         continue;
8438                 env->subprog_info[i].start += len - 1;
8439         }
8440 }
8441
8442 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
8443                                             const struct bpf_insn *patch, u32 len)
8444 {
8445         struct bpf_prog *new_prog;
8446         struct bpf_insn_aux_data *new_data = NULL;
8447
8448         if (len > 1) {
8449                 new_data = vzalloc(array_size(env->prog->len + len - 1,
8450                                               sizeof(struct bpf_insn_aux_data)));
8451                 if (!new_data)
8452                         return NULL;
8453         }
8454
8455         new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
8456         if (IS_ERR(new_prog)) {
8457                 if (PTR_ERR(new_prog) == -ERANGE)
8458                         verbose(env,
8459                                 "insn %d cannot be patched due to 16-bit range\n",
8460                                 env->insn_aux_data[off].orig_idx);
8461                 vfree(new_data);
8462                 return NULL;
8463         }
8464         adjust_insn_aux_data(env, new_data, new_prog, off, len);
8465         adjust_subprog_starts(env, off, len);
8466         return new_prog;
8467 }
8468
8469 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
8470                                               u32 off, u32 cnt)
8471 {
8472         int i, j;
8473
8474         /* find first prog starting at or after off (first to remove) */
8475         for (i = 0; i < env->subprog_cnt; i++)
8476                 if (env->subprog_info[i].start >= off)
8477                         break;
8478         /* find first prog starting at or after off + cnt (first to stay) */
8479         for (j = i; j < env->subprog_cnt; j++)
8480                 if (env->subprog_info[j].start >= off + cnt)
8481                         break;
8482         /* if j doesn't start exactly at off + cnt, we are just removing
8483          * the front of previous prog
8484          */
8485         if (env->subprog_info[j].start != off + cnt)
8486                 j--;
8487
8488         if (j > i) {
8489                 struct bpf_prog_aux *aux = env->prog->aux;
8490                 int move;
8491
8492                 /* move fake 'exit' subprog as well */
8493                 move = env->subprog_cnt + 1 - j;
8494
8495                 memmove(env->subprog_info + i,
8496                         env->subprog_info + j,
8497                         sizeof(*env->subprog_info) * move);
8498                 env->subprog_cnt -= j - i;
8499
8500                 /* remove func_info */
8501                 if (aux->func_info) {
8502                         move = aux->func_info_cnt - j;
8503
8504                         memmove(aux->func_info + i,
8505                                 aux->func_info + j,
8506                                 sizeof(*aux->func_info) * move);
8507                         aux->func_info_cnt -= j - i;
8508                         /* func_info->insn_off is set after all code rewrites,
8509                          * in adjust_btf_func() - no need to adjust
8510                          */
8511                 }
8512         } else {
8513                 /* convert i from "first prog to remove" to "first to adjust" */
8514                 if (env->subprog_info[i].start == off)
8515                         i++;
8516         }
8517
8518         /* update fake 'exit' subprog as well */
8519         for (; i <= env->subprog_cnt; i++)
8520                 env->subprog_info[i].start -= cnt;
8521
8522         return 0;
8523 }
8524
8525 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
8526                                       u32 cnt)
8527 {
8528         struct bpf_prog *prog = env->prog;
8529         u32 i, l_off, l_cnt, nr_linfo;
8530         struct bpf_line_info *linfo;
8531
8532         nr_linfo = prog->aux->nr_linfo;
8533         if (!nr_linfo)
8534                 return 0;
8535
8536         linfo = prog->aux->linfo;
8537
8538         /* find first line info to remove, count lines to be removed */
8539         for (i = 0; i < nr_linfo; i++)
8540                 if (linfo[i].insn_off >= off)
8541                         break;
8542
8543         l_off = i;
8544         l_cnt = 0;
8545         for (; i < nr_linfo; i++)
8546                 if (linfo[i].insn_off < off + cnt)
8547                         l_cnt++;
8548                 else
8549                         break;
8550
8551         /* First live insn doesn't match first live linfo, it needs to "inherit"
8552          * last removed linfo.  prog is already modified, so prog->len == off
8553          * means no live instructions after (tail of the program was removed).
8554          */
8555         if (prog->len != off && l_cnt &&
8556             (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
8557                 l_cnt--;
8558                 linfo[--i].insn_off = off + cnt;
8559         }
8560
8561         /* remove the line info which refer to the removed instructions */
8562         if (l_cnt) {
8563                 memmove(linfo + l_off, linfo + i,
8564                         sizeof(*linfo) * (nr_linfo - i));
8565
8566                 prog->aux->nr_linfo -= l_cnt;
8567                 nr_linfo = prog->aux->nr_linfo;
8568         }
8569
8570         /* pull all linfo[i].insn_off >= off + cnt in by cnt */
8571         for (i = l_off; i < nr_linfo; i++)
8572                 linfo[i].insn_off -= cnt;
8573
8574         /* fix up all subprogs (incl. 'exit') which start >= off */
8575         for (i = 0; i <= env->subprog_cnt; i++)
8576                 if (env->subprog_info[i].linfo_idx > l_off) {
8577                         /* program may have started in the removed region but
8578                          * may not be fully removed
8579                          */
8580                         if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
8581                                 env->subprog_info[i].linfo_idx -= l_cnt;
8582                         else
8583                                 env->subprog_info[i].linfo_idx = l_off;
8584                 }
8585
8586         return 0;
8587 }
8588
8589 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
8590 {
8591         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
8592         unsigned int orig_prog_len = env->prog->len;
8593         int err;
8594
8595         if (bpf_prog_is_dev_bound(env->prog->aux))
8596                 bpf_prog_offload_remove_insns(env, off, cnt);
8597
8598         err = bpf_remove_insns(env->prog, off, cnt);
8599         if (err)
8600                 return err;
8601
8602         err = adjust_subprog_starts_after_remove(env, off, cnt);
8603         if (err)
8604                 return err;
8605
8606         err = bpf_adj_linfo_after_remove(env, off, cnt);
8607         if (err)
8608                 return err;
8609
8610         memmove(aux_data + off, aux_data + off + cnt,
8611                 sizeof(*aux_data) * (orig_prog_len - off - cnt));
8612
8613         return 0;
8614 }
8615
8616 /* The verifier does more data flow analysis than llvm and will not
8617  * explore branches that are dead at run time. Malicious programs can
8618  * have dead code too. Therefore replace all dead at-run-time code
8619  * with 'ja -1'.
8620  *
8621  * Just nops are not optimal, e.g. if they would sit at the end of the
8622  * program and through another bug we would manage to jump there, then
8623  * we'd execute beyond program memory otherwise. Returning exception
8624  * code also wouldn't work since we can have subprogs where the dead
8625  * code could be located.
8626  */
8627 static void sanitize_dead_code(struct bpf_verifier_env *env)
8628 {
8629         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
8630         struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
8631         struct bpf_insn *insn = env->prog->insnsi;
8632         const int insn_cnt = env->prog->len;
8633         int i;
8634
8635         for (i = 0; i < insn_cnt; i++) {
8636                 if (aux_data[i].seen)
8637                         continue;
8638                 memcpy(insn + i, &trap, sizeof(trap));
8639                 aux_data[i].zext_dst = false;
8640         }
8641 }
8642
8643 static bool insn_is_cond_jump(u8 code)
8644 {
8645         u8 op;
8646
8647         if (BPF_CLASS(code) == BPF_JMP32)
8648                 return true;
8649
8650         if (BPF_CLASS(code) != BPF_JMP)
8651                 return false;
8652
8653         op = BPF_OP(code);
8654         return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
8655 }
8656
8657 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
8658 {
8659         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
8660         struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
8661         struct bpf_insn *insn = env->prog->insnsi;
8662         const int insn_cnt = env->prog->len;
8663         int i;
8664
8665         for (i = 0; i < insn_cnt; i++, insn++) {
8666                 if (!insn_is_cond_jump(insn->code))
8667                         continue;
8668
8669                 if (!aux_data[i + 1].seen)
8670                         ja.off = insn->off;
8671                 else if (!aux_data[i + 1 + insn->off].seen)
8672                         ja.off = 0;
8673                 else
8674                         continue;
8675
8676                 if (bpf_prog_is_dev_bound(env->prog->aux))
8677                         bpf_prog_offload_replace_insn(env, i, &ja);
8678
8679                 memcpy(insn, &ja, sizeof(ja));
8680         }
8681 }
8682
8683 static int opt_remove_dead_code(struct bpf_verifier_env *env)
8684 {
8685         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
8686         int insn_cnt = env->prog->len;
8687         int i, err;
8688
8689         for (i = 0; i < insn_cnt; i++) {
8690                 int j;
8691
8692                 j = 0;
8693                 while (i + j < insn_cnt && !aux_data[i + j].seen)
8694                         j++;
8695                 if (!j)
8696                         continue;
8697
8698                 err = verifier_remove_insns(env, i, j);
8699                 if (err)
8700                         return err;
8701                 insn_cnt = env->prog->len;
8702         }
8703
8704         return 0;
8705 }
8706
8707 static int opt_remove_nops(struct bpf_verifier_env *env)
8708 {
8709         const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
8710         struct bpf_insn *insn = env->prog->insnsi;
8711         int insn_cnt = env->prog->len;
8712         int i, err;
8713
8714         for (i = 0; i < insn_cnt; i++) {
8715                 if (memcmp(&insn[i], &ja, sizeof(ja)))
8716                         continue;
8717
8718                 err = verifier_remove_insns(env, i, 1);
8719                 if (err)
8720                         return err;
8721                 insn_cnt--;
8722                 i--;
8723         }
8724
8725         return 0;
8726 }
8727
8728 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
8729                                          const union bpf_attr *attr)
8730 {
8731         struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
8732         struct bpf_insn_aux_data *aux = env->insn_aux_data;
8733         int i, patch_len, delta = 0, len = env->prog->len;
8734         struct bpf_insn *insns = env->prog->insnsi;
8735         struct bpf_prog *new_prog;
8736         bool rnd_hi32;
8737
8738         rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
8739         zext_patch[1] = BPF_ZEXT_REG(0);
8740         rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
8741         rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
8742         rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
8743         for (i = 0; i < len; i++) {
8744                 int adj_idx = i + delta;
8745                 struct bpf_insn insn;
8746
8747                 insn = insns[adj_idx];
8748                 if (!aux[adj_idx].zext_dst) {
8749                         u8 code, class;
8750                         u32 imm_rnd;
8751
8752                         if (!rnd_hi32)
8753                                 continue;
8754
8755                         code = insn.code;
8756                         class = BPF_CLASS(code);
8757                         if (insn_no_def(&insn))
8758                                 continue;
8759
8760                         /* NOTE: arg "reg" (the fourth one) is only used for
8761                          *       BPF_STX which has been ruled out in above
8762                          *       check, it is safe to pass NULL here.
8763                          */
8764                         if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
8765                                 if (class == BPF_LD &&
8766                                     BPF_MODE(code) == BPF_IMM)
8767                                         i++;
8768                                 continue;
8769                         }
8770
8771                         /* ctx load could be transformed into wider load. */
8772                         if (class == BPF_LDX &&
8773                             aux[adj_idx].ptr_type == PTR_TO_CTX)
8774                                 continue;
8775
8776                         imm_rnd = get_random_int();
8777                         rnd_hi32_patch[0] = insn;
8778                         rnd_hi32_patch[1].imm = imm_rnd;
8779                         rnd_hi32_patch[3].dst_reg = insn.dst_reg;
8780                         patch = rnd_hi32_patch;
8781                         patch_len = 4;
8782                         goto apply_patch_buffer;
8783                 }
8784
8785                 if (!bpf_jit_needs_zext())
8786                         continue;
8787
8788                 zext_patch[0] = insn;
8789                 zext_patch[1].dst_reg = insn.dst_reg;
8790                 zext_patch[1].src_reg = insn.dst_reg;
8791                 patch = zext_patch;
8792                 patch_len = 2;
8793 apply_patch_buffer:
8794                 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
8795                 if (!new_prog)
8796                         return -ENOMEM;
8797                 env->prog = new_prog;
8798                 insns = new_prog->insnsi;
8799                 aux = env->insn_aux_data;
8800                 delta += patch_len - 1;
8801         }
8802
8803         return 0;
8804 }
8805
8806 /* convert load instructions that access fields of a context type into a
8807  * sequence of instructions that access fields of the underlying structure:
8808  *     struct __sk_buff    -> struct sk_buff
8809  *     struct bpf_sock_ops -> struct sock
8810  */
8811 static int convert_ctx_accesses(struct bpf_verifier_env *env)
8812 {
8813         const struct bpf_verifier_ops *ops = env->ops;
8814         int i, cnt, size, ctx_field_size, delta = 0;
8815         const int insn_cnt = env->prog->len;
8816         struct bpf_insn insn_buf[16], *insn;
8817         u32 target_size, size_default, off;
8818         struct bpf_prog *new_prog;
8819         enum bpf_access_type type;
8820         bool is_narrower_load;
8821
8822         if (ops->gen_prologue || env->seen_direct_write) {
8823                 if (!ops->gen_prologue) {
8824                         verbose(env, "bpf verifier is misconfigured\n");
8825                         return -EINVAL;
8826                 }
8827                 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
8828                                         env->prog);
8829                 if (cnt >= ARRAY_SIZE(insn_buf)) {
8830                         verbose(env, "bpf verifier is misconfigured\n");
8831                         return -EINVAL;
8832                 } else if (cnt) {
8833                         new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
8834                         if (!new_prog)
8835                                 return -ENOMEM;
8836
8837                         env->prog = new_prog;
8838                         delta += cnt - 1;
8839                 }
8840         }
8841
8842         if (bpf_prog_is_dev_bound(env->prog->aux))
8843                 return 0;
8844
8845         insn = env->prog->insnsi + delta;
8846
8847         for (i = 0; i < insn_cnt; i++, insn++) {
8848                 bpf_convert_ctx_access_t convert_ctx_access;
8849                 bool ctx_access;
8850
8851                 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
8852                     insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
8853                     insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
8854                     insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
8855                         type = BPF_READ;
8856                         ctx_access = true;
8857                 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
8858                            insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
8859                            insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
8860                            insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
8861                            insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
8862                            insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
8863                            insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
8864                            insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
8865                         type = BPF_WRITE;
8866                         ctx_access = BPF_CLASS(insn->code) == BPF_STX;
8867                 } else {
8868                         continue;
8869                 }
8870
8871                 if (type == BPF_WRITE &&
8872                     env->insn_aux_data[i + delta].sanitize_stack_spill) {
8873                         struct bpf_insn patch[] = {
8874                                 *insn,
8875                                 BPF_ST_NOSPEC(),
8876                         };
8877
8878                         cnt = ARRAY_SIZE(patch);
8879                         new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
8880                         if (!new_prog)
8881                                 return -ENOMEM;
8882
8883                         delta    += cnt - 1;
8884                         env->prog = new_prog;
8885                         insn      = new_prog->insnsi + i + delta;
8886                         continue;
8887                 }
8888
8889                 if (!ctx_access)
8890                         continue;
8891
8892                 switch (env->insn_aux_data[i + delta].ptr_type) {
8893                 case PTR_TO_CTX:
8894                         if (!ops->convert_ctx_access)
8895                                 continue;
8896                         convert_ctx_access = ops->convert_ctx_access;
8897                         break;
8898                 case PTR_TO_SOCKET:
8899                 case PTR_TO_SOCK_COMMON:
8900                         convert_ctx_access = bpf_sock_convert_ctx_access;
8901                         break;
8902                 case PTR_TO_TCP_SOCK:
8903                         convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
8904                         break;
8905                 case PTR_TO_XDP_SOCK:
8906                         convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
8907                         break;
8908                 default:
8909                         continue;
8910                 }
8911
8912                 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
8913                 size = BPF_LDST_BYTES(insn);
8914
8915                 /* If the read access is a narrower load of the field,
8916                  * convert to a 4/8-byte load, to minimum program type specific
8917                  * convert_ctx_access changes. If conversion is successful,
8918                  * we will apply proper mask to the result.
8919                  */
8920                 is_narrower_load = size < ctx_field_size;
8921                 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
8922                 off = insn->off;
8923                 if (is_narrower_load) {
8924                         u8 size_code;
8925
8926                         if (type == BPF_WRITE) {
8927                                 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
8928                                 return -EINVAL;
8929                         }
8930
8931                         size_code = BPF_H;
8932                         if (ctx_field_size == 4)
8933                                 size_code = BPF_W;
8934                         else if (ctx_field_size == 8)
8935                                 size_code = BPF_DW;
8936
8937                         insn->off = off & ~(size_default - 1);
8938                         insn->code = BPF_LDX | BPF_MEM | size_code;
8939                 }
8940
8941                 target_size = 0;
8942                 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
8943                                          &target_size);
8944                 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
8945                     (ctx_field_size && !target_size)) {
8946                         verbose(env, "bpf verifier is misconfigured\n");
8947                         return -EINVAL;
8948                 }
8949
8950                 if (is_narrower_load && size < target_size) {
8951                         u8 shift = bpf_ctx_narrow_access_offset(
8952                                 off, size, size_default) * 8;
8953                         if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
8954                                 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
8955                                 return -EINVAL;
8956                         }
8957                         if (ctx_field_size <= 4) {
8958                                 if (shift)
8959                                         insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
8960                                                                         insn->dst_reg,
8961                                                                         shift);
8962                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
8963                                                                 (1 << size * 8) - 1);
8964                         } else {
8965                                 if (shift)
8966                                         insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
8967                                                                         insn->dst_reg,
8968                                                                         shift);
8969                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
8970                                                                 (1ULL << size * 8) - 1);
8971                         }
8972                 }
8973
8974                 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
8975                 if (!new_prog)
8976                         return -ENOMEM;
8977
8978                 delta += cnt - 1;
8979
8980                 /* keep walking new program and skip insns we just inserted */
8981                 env->prog = new_prog;
8982                 insn      = new_prog->insnsi + i + delta;
8983         }
8984
8985         return 0;
8986 }
8987
8988 static int jit_subprogs(struct bpf_verifier_env *env)
8989 {
8990         struct bpf_prog *prog = env->prog, **func, *tmp;
8991         int i, j, subprog_start, subprog_end = 0, len, subprog;
8992         struct bpf_insn *insn;
8993         void *old_bpf_func;
8994         int err;
8995
8996         if (env->subprog_cnt <= 1)
8997                 return 0;
8998
8999         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
9000                 if (insn->code != (BPF_JMP | BPF_CALL) ||
9001                     insn->src_reg != BPF_PSEUDO_CALL)
9002                         continue;
9003                 /* Upon error here we cannot fall back to interpreter but
9004                  * need a hard reject of the program. Thus -EFAULT is
9005                  * propagated in any case.
9006                  */
9007                 subprog = find_subprog(env, i + insn->imm + 1);
9008                 if (subprog < 0) {
9009                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
9010                                   i + insn->imm + 1);
9011                         return -EFAULT;
9012                 }
9013                 /* temporarily remember subprog id inside insn instead of
9014                  * aux_data, since next loop will split up all insns into funcs
9015                  */
9016                 insn->off = subprog;
9017                 /* remember original imm in case JIT fails and fallback
9018                  * to interpreter will be needed
9019                  */
9020                 env->insn_aux_data[i].call_imm = insn->imm;
9021                 /* point imm to __bpf_call_base+1 from JITs point of view */
9022                 insn->imm = 1;
9023         }
9024
9025         err = bpf_prog_alloc_jited_linfo(prog);
9026         if (err)
9027                 goto out_undo_insn;
9028
9029         err = -ENOMEM;
9030         func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
9031         if (!func)
9032                 goto out_undo_insn;
9033
9034         for (i = 0; i < env->subprog_cnt; i++) {
9035                 subprog_start = subprog_end;
9036                 subprog_end = env->subprog_info[i + 1].start;
9037
9038                 len = subprog_end - subprog_start;
9039                 /* BPF_PROG_RUN doesn't call subprogs directly,
9040                  * hence main prog stats include the runtime of subprogs.
9041                  * subprogs don't have IDs and not reachable via prog_get_next_id
9042                  * func[i]->aux->stats will never be accessed and stays NULL
9043                  */
9044                 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
9045                 if (!func[i])
9046                         goto out_free;
9047                 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
9048                        len * sizeof(struct bpf_insn));
9049                 func[i]->type = prog->type;
9050                 func[i]->len = len;
9051                 if (bpf_prog_calc_tag(func[i]))
9052                         goto out_free;
9053                 func[i]->is_func = 1;
9054                 func[i]->aux->func_idx = i;
9055                 /* the btf and func_info will be freed only at prog->aux */
9056                 func[i]->aux->btf = prog->aux->btf;
9057                 func[i]->aux->func_info = prog->aux->func_info;
9058
9059                 /* Use bpf_prog_F_tag to indicate functions in stack traces.
9060                  * Long term would need debug info to populate names
9061                  */
9062                 func[i]->aux->name[0] = 'F';
9063                 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
9064                 func[i]->jit_requested = 1;
9065                 func[i]->aux->linfo = prog->aux->linfo;
9066                 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
9067                 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
9068                 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
9069                 func[i] = bpf_int_jit_compile(func[i]);
9070                 if (!func[i]->jited) {
9071                         err = -ENOTSUPP;
9072                         goto out_free;
9073                 }
9074                 cond_resched();
9075         }
9076         /* at this point all bpf functions were successfully JITed
9077          * now populate all bpf_calls with correct addresses and
9078          * run last pass of JIT
9079          */
9080         for (i = 0; i < env->subprog_cnt; i++) {
9081                 insn = func[i]->insnsi;
9082                 for (j = 0; j < func[i]->len; j++, insn++) {
9083                         if (insn->code != (BPF_JMP | BPF_CALL) ||
9084                             insn->src_reg != BPF_PSEUDO_CALL)
9085                                 continue;
9086                         subprog = insn->off;
9087                         insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
9088                                     __bpf_call_base;
9089                 }
9090
9091                 /* we use the aux data to keep a list of the start addresses
9092                  * of the JITed images for each function in the program
9093                  *
9094                  * for some architectures, such as powerpc64, the imm field
9095                  * might not be large enough to hold the offset of the start
9096                  * address of the callee's JITed image from __bpf_call_base
9097                  *
9098                  * in such cases, we can lookup the start address of a callee
9099                  * by using its subprog id, available from the off field of
9100                  * the call instruction, as an index for this list
9101                  */
9102                 func[i]->aux->func = func;
9103                 func[i]->aux->func_cnt = env->subprog_cnt;
9104         }
9105         for (i = 0; i < env->subprog_cnt; i++) {
9106                 old_bpf_func = func[i]->bpf_func;
9107                 tmp = bpf_int_jit_compile(func[i]);
9108                 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
9109                         verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
9110                         err = -ENOTSUPP;
9111                         goto out_free;
9112                 }
9113                 cond_resched();
9114         }
9115
9116         /* finally lock prog and jit images for all functions and
9117          * populate kallsysm
9118          */
9119         for (i = 0; i < env->subprog_cnt; i++) {
9120                 bpf_prog_lock_ro(func[i]);
9121                 bpf_prog_kallsyms_add(func[i]);
9122         }
9123
9124         /* Last step: make now unused interpreter insns from main
9125          * prog consistent for later dump requests, so they can
9126          * later look the same as if they were interpreted only.
9127          */
9128         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
9129                 if (insn->code != (BPF_JMP | BPF_CALL) ||
9130                     insn->src_reg != BPF_PSEUDO_CALL)
9131                         continue;
9132                 insn->off = env->insn_aux_data[i].call_imm;
9133                 subprog = find_subprog(env, i + insn->off + 1);
9134                 insn->imm = subprog;
9135         }
9136
9137         prog->jited = 1;
9138         prog->bpf_func = func[0]->bpf_func;
9139         prog->aux->func = func;
9140         prog->aux->func_cnt = env->subprog_cnt;
9141         bpf_prog_free_unused_jited_linfo(prog);
9142         return 0;
9143 out_free:
9144         for (i = 0; i < env->subprog_cnt; i++)
9145                 if (func[i])
9146                         bpf_jit_free(func[i]);
9147         kfree(func);
9148 out_undo_insn:
9149         /* cleanup main prog to be interpreted */
9150         prog->jit_requested = 0;
9151         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
9152                 if (insn->code != (BPF_JMP | BPF_CALL) ||
9153                     insn->src_reg != BPF_PSEUDO_CALL)
9154                         continue;
9155                 insn->off = 0;
9156                 insn->imm = env->insn_aux_data[i].call_imm;
9157         }
9158         bpf_prog_free_jited_linfo(prog);
9159         return err;
9160 }
9161
9162 static int fixup_call_args(struct bpf_verifier_env *env)
9163 {
9164 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
9165         struct bpf_prog *prog = env->prog;
9166         struct bpf_insn *insn = prog->insnsi;
9167         int i, depth;
9168 #endif
9169         int err = 0;
9170
9171         if (env->prog->jit_requested &&
9172             !bpf_prog_is_dev_bound(env->prog->aux)) {
9173                 err = jit_subprogs(env);
9174                 if (err == 0)
9175                         return 0;
9176                 if (err == -EFAULT)
9177                         return err;
9178         }
9179 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
9180         for (i = 0; i < prog->len; i++, insn++) {
9181                 if (insn->code != (BPF_JMP | BPF_CALL) ||
9182                     insn->src_reg != BPF_PSEUDO_CALL)
9183                         continue;
9184                 depth = get_callee_stack_depth(env, insn, i);
9185                 if (depth < 0)
9186                         return depth;
9187                 bpf_patch_call_args(insn, depth);
9188         }
9189         err = 0;
9190 #endif
9191         return err;
9192 }
9193
9194 /* fixup insn->imm field of bpf_call instructions
9195  * and inline eligible helpers as explicit sequence of BPF instructions
9196  *
9197  * this function is called after eBPF program passed verification
9198  */
9199 static int fixup_bpf_calls(struct bpf_verifier_env *env)
9200 {
9201         struct bpf_prog *prog = env->prog;
9202         struct bpf_insn *insn = prog->insnsi;
9203         const struct bpf_func_proto *fn;
9204         const int insn_cnt = prog->len;
9205         const struct bpf_map_ops *ops;
9206         struct bpf_insn_aux_data *aux;
9207         struct bpf_insn insn_buf[16];
9208         struct bpf_prog *new_prog;
9209         struct bpf_map *map_ptr;
9210         int i, cnt, delta = 0;
9211
9212         for (i = 0; i < insn_cnt; i++, insn++) {
9213                 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
9214                     insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
9215                     insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
9216                     insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
9217                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
9218                         bool isdiv = BPF_OP(insn->code) == BPF_DIV;
9219                         struct bpf_insn *patchlet;
9220                         struct bpf_insn chk_and_div[] = {
9221                                 /* [R,W]x div 0 -> 0 */
9222                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
9223                                              BPF_JNE | BPF_K, insn->src_reg,
9224                                              0, 2, 0),
9225                                 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
9226                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
9227                                 *insn,
9228                         };
9229                         struct bpf_insn chk_and_mod[] = {
9230                                 /* [R,W]x mod 0 -> [R,W]x */
9231                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
9232                                              BPF_JEQ | BPF_K, insn->src_reg,
9233                                              0, 1 + (is64 ? 0 : 1), 0),
9234                                 *insn,
9235                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
9236                                 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
9237                         };
9238
9239                         patchlet = isdiv ? chk_and_div : chk_and_mod;
9240                         cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
9241                                       ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
9242
9243                         new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
9244                         if (!new_prog)
9245                                 return -ENOMEM;
9246
9247                         delta    += cnt - 1;
9248                         env->prog = prog = new_prog;
9249                         insn      = new_prog->insnsi + i + delta;
9250                         continue;
9251                 }
9252
9253                 if (BPF_CLASS(insn->code) == BPF_LD &&
9254                     (BPF_MODE(insn->code) == BPF_ABS ||
9255                      BPF_MODE(insn->code) == BPF_IND)) {
9256                         cnt = env->ops->gen_ld_abs(insn, insn_buf);
9257                         if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
9258                                 verbose(env, "bpf verifier is misconfigured\n");
9259                                 return -EINVAL;
9260                         }
9261
9262                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9263                         if (!new_prog)
9264                                 return -ENOMEM;
9265
9266                         delta    += cnt - 1;
9267                         env->prog = prog = new_prog;
9268                         insn      = new_prog->insnsi + i + delta;
9269                         continue;
9270                 }
9271
9272                 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
9273                     insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
9274                         const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
9275                         const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
9276                         struct bpf_insn insn_buf[16];
9277                         struct bpf_insn *patch = &insn_buf[0];
9278                         bool issrc, isneg, isimm;
9279                         u32 off_reg;
9280
9281                         aux = &env->insn_aux_data[i + delta];
9282                         if (!aux->alu_state ||
9283                             aux->alu_state == BPF_ALU_NON_POINTER)
9284                                 continue;
9285
9286                         isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
9287                         issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
9288                                 BPF_ALU_SANITIZE_SRC;
9289                         isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
9290
9291                         off_reg = issrc ? insn->src_reg : insn->dst_reg;
9292                         if (isimm) {
9293                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
9294                         } else {
9295                                 if (isneg)
9296                                         *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
9297                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
9298                                 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
9299                                 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
9300                                 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
9301                                 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
9302                                 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
9303                         }
9304                         if (!issrc)
9305                                 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
9306                         insn->src_reg = BPF_REG_AX;
9307                         if (isneg)
9308                                 insn->code = insn->code == code_add ?
9309                                              code_sub : code_add;
9310                         *patch++ = *insn;
9311                         if (issrc && isneg && !isimm)
9312                                 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
9313                         cnt = patch - insn_buf;
9314
9315                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9316                         if (!new_prog)
9317                                 return -ENOMEM;
9318
9319                         delta    += cnt - 1;
9320                         env->prog = prog = new_prog;
9321                         insn      = new_prog->insnsi + i + delta;
9322                         continue;
9323                 }
9324
9325                 if (insn->code != (BPF_JMP | BPF_CALL))
9326                         continue;
9327                 if (insn->src_reg == BPF_PSEUDO_CALL)
9328                         continue;
9329
9330                 if (insn->imm == BPF_FUNC_get_route_realm)
9331                         prog->dst_needed = 1;
9332                 if (insn->imm == BPF_FUNC_get_prandom_u32)
9333                         bpf_user_rnd_init_once();
9334                 if (insn->imm == BPF_FUNC_override_return)
9335                         prog->kprobe_override = 1;
9336                 if (insn->imm == BPF_FUNC_tail_call) {
9337                         /* If we tail call into other programs, we
9338                          * cannot make any assumptions since they can
9339                          * be replaced dynamically during runtime in
9340                          * the program array.
9341                          */
9342                         prog->cb_access = 1;
9343                         env->prog->aux->stack_depth = MAX_BPF_STACK;
9344                         env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
9345
9346                         /* mark bpf_tail_call as different opcode to avoid
9347                          * conditional branch in the interpeter for every normal
9348                          * call and to prevent accidental JITing by JIT compiler
9349                          * that doesn't support bpf_tail_call yet
9350                          */
9351                         insn->imm = 0;
9352                         insn->code = BPF_JMP | BPF_TAIL_CALL;
9353
9354                         aux = &env->insn_aux_data[i + delta];
9355                         if (!bpf_map_ptr_unpriv(aux))
9356                                 continue;
9357
9358                         /* instead of changing every JIT dealing with tail_call
9359                          * emit two extra insns:
9360                          * if (index >= max_entries) goto out;
9361                          * index &= array->index_mask;
9362                          * to avoid out-of-bounds cpu speculation
9363                          */
9364                         if (bpf_map_ptr_poisoned(aux)) {
9365                                 verbose(env, "tail_call abusing map_ptr\n");
9366                                 return -EINVAL;
9367                         }
9368
9369                         map_ptr = BPF_MAP_PTR(aux->map_state);
9370                         insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
9371                                                   map_ptr->max_entries, 2);
9372                         insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
9373                                                     container_of(map_ptr,
9374                                                                  struct bpf_array,
9375                                                                  map)->index_mask);
9376                         insn_buf[2] = *insn;
9377                         cnt = 3;
9378                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9379                         if (!new_prog)
9380                                 return -ENOMEM;
9381
9382                         delta    += cnt - 1;
9383                         env->prog = prog = new_prog;
9384                         insn      = new_prog->insnsi + i + delta;
9385                         continue;
9386                 }
9387
9388                 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
9389                  * and other inlining handlers are currently limited to 64 bit
9390                  * only.
9391                  */
9392                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
9393                     (insn->imm == BPF_FUNC_map_lookup_elem ||
9394                      insn->imm == BPF_FUNC_map_update_elem ||
9395                      insn->imm == BPF_FUNC_map_delete_elem ||
9396                      insn->imm == BPF_FUNC_map_push_elem   ||
9397                      insn->imm == BPF_FUNC_map_pop_elem    ||
9398                      insn->imm == BPF_FUNC_map_peek_elem)) {
9399                         aux = &env->insn_aux_data[i + delta];
9400                         if (bpf_map_ptr_poisoned(aux))
9401                                 goto patch_call_imm;
9402
9403                         map_ptr = BPF_MAP_PTR(aux->map_state);
9404                         ops = map_ptr->ops;
9405                         if (insn->imm == BPF_FUNC_map_lookup_elem &&
9406                             ops->map_gen_lookup) {
9407                                 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
9408                                 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
9409                                         verbose(env, "bpf verifier is misconfigured\n");
9410                                         return -EINVAL;
9411                                 }
9412
9413                                 new_prog = bpf_patch_insn_data(env, i + delta,
9414                                                                insn_buf, cnt);
9415                                 if (!new_prog)
9416                                         return -ENOMEM;
9417
9418                                 delta    += cnt - 1;
9419                                 env->prog = prog = new_prog;
9420                                 insn      = new_prog->insnsi + i + delta;
9421                                 continue;
9422                         }
9423
9424                         BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
9425                                      (void *(*)(struct bpf_map *map, void *key))NULL));
9426                         BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
9427                                      (int (*)(struct bpf_map *map, void *key))NULL));
9428                         BUILD_BUG_ON(!__same_type(ops->map_update_elem,
9429                                      (int (*)(struct bpf_map *map, void *key, void *value,
9430                                               u64 flags))NULL));
9431                         BUILD_BUG_ON(!__same_type(ops->map_push_elem,
9432                                      (int (*)(struct bpf_map *map, void *value,
9433                                               u64 flags))NULL));
9434                         BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
9435                                      (int (*)(struct bpf_map *map, void *value))NULL));
9436                         BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
9437                                      (int (*)(struct bpf_map *map, void *value))NULL));
9438
9439                         switch (insn->imm) {
9440                         case BPF_FUNC_map_lookup_elem:
9441                                 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
9442                                             __bpf_call_base;
9443                                 continue;
9444                         case BPF_FUNC_map_update_elem:
9445                                 insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
9446                                             __bpf_call_base;
9447                                 continue;
9448                         case BPF_FUNC_map_delete_elem:
9449                                 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
9450                                             __bpf_call_base;
9451                                 continue;
9452                         case BPF_FUNC_map_push_elem:
9453                                 insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
9454                                             __bpf_call_base;
9455                                 continue;
9456                         case BPF_FUNC_map_pop_elem:
9457                                 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
9458                                             __bpf_call_base;
9459                                 continue;
9460                         case BPF_FUNC_map_peek_elem:
9461                                 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
9462                                             __bpf_call_base;
9463                                 continue;
9464                         }
9465
9466                         goto patch_call_imm;
9467                 }
9468
9469 patch_call_imm:
9470                 fn = env->ops->get_func_proto(insn->imm, env->prog);
9471                 /* all functions that have prototype and verifier allowed
9472                  * programs to call them, must be real in-kernel functions
9473                  */
9474                 if (!fn->func) {
9475                         verbose(env,
9476                                 "kernel subsystem misconfigured func %s#%d\n",
9477                                 func_id_name(insn->imm), insn->imm);
9478                         return -EFAULT;
9479                 }
9480                 insn->imm = fn->func - __bpf_call_base;
9481         }
9482
9483         return 0;
9484 }
9485
9486 static void free_states(struct bpf_verifier_env *env)
9487 {
9488         struct bpf_verifier_state_list *sl, *sln;
9489         int i;
9490
9491         sl = env->free_list;
9492         while (sl) {
9493                 sln = sl->next;
9494                 free_verifier_state(&sl->state, false);
9495                 kfree(sl);
9496                 sl = sln;
9497         }
9498
9499         if (!env->explored_states)
9500                 return;
9501
9502         for (i = 0; i < state_htab_size(env); i++) {
9503                 sl = env->explored_states[i];
9504
9505                 while (sl) {
9506                         sln = sl->next;
9507                         free_verifier_state(&sl->state, false);
9508                         kfree(sl);
9509                         sl = sln;
9510                 }
9511         }
9512
9513         kvfree(env->explored_states);
9514 }
9515
9516 static void print_verification_stats(struct bpf_verifier_env *env)
9517 {
9518         int i;
9519
9520         if (env->log.level & BPF_LOG_STATS) {
9521                 verbose(env, "verification time %lld usec\n",
9522                         div_u64(env->verification_time, 1000));
9523                 verbose(env, "stack depth ");
9524                 for (i = 0; i < env->subprog_cnt; i++) {
9525                         u32 depth = env->subprog_info[i].stack_depth;
9526
9527                         verbose(env, "%d", depth);
9528                         if (i + 1 < env->subprog_cnt)
9529                                 verbose(env, "+");
9530                 }
9531                 verbose(env, "\n");
9532         }
9533         verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
9534                 "total_states %d peak_states %d mark_read %d\n",
9535                 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
9536                 env->max_states_per_insn, env->total_states,
9537                 env->peak_states, env->longest_mark_read_walk);
9538 }
9539
9540 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
9541               union bpf_attr __user *uattr)
9542 {
9543         u64 start_time = ktime_get_ns();
9544         struct bpf_verifier_env *env;
9545         struct bpf_verifier_log *log;
9546         int i, len, ret = -EINVAL;
9547         bool is_priv;
9548
9549         /* no program is valid */
9550         if (ARRAY_SIZE(bpf_verifier_ops) == 0)
9551                 return -EINVAL;
9552
9553         /* 'struct bpf_verifier_env' can be global, but since it's not small,
9554          * allocate/free it every time bpf_check() is called
9555          */
9556         env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
9557         if (!env)
9558                 return -ENOMEM;
9559         log = &env->log;
9560
9561         len = (*prog)->len;
9562         env->insn_aux_data =
9563                 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
9564         ret = -ENOMEM;
9565         if (!env->insn_aux_data)
9566                 goto err_free_env;
9567         for (i = 0; i < len; i++)
9568                 env->insn_aux_data[i].orig_idx = i;
9569         env->prog = *prog;
9570         env->ops = bpf_verifier_ops[env->prog->type];
9571         is_priv = capable(CAP_SYS_ADMIN);
9572
9573         /* grab the mutex to protect few globals used by verifier */
9574         if (!is_priv)
9575                 mutex_lock(&bpf_verifier_lock);
9576
9577         if (attr->log_level || attr->log_buf || attr->log_size) {
9578                 /* user requested verbose verifier output
9579                  * and supplied buffer to store the verification trace
9580                  */
9581                 log->level = attr->log_level;
9582                 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
9583                 log->len_total = attr->log_size;
9584
9585                 ret = -EINVAL;
9586                 /* log attributes have to be sane */
9587                 if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
9588                     !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
9589                         goto err_unlock;
9590         }
9591
9592         env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
9593         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
9594                 env->strict_alignment = true;
9595         if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
9596                 env->strict_alignment = false;
9597
9598         env->allow_ptr_leaks = is_priv;
9599
9600         if (is_priv)
9601                 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
9602
9603         ret = replace_map_fd_with_map_ptr(env);
9604         if (ret < 0)
9605                 goto skip_full_check;
9606
9607         if (bpf_prog_is_dev_bound(env->prog->aux)) {
9608                 ret = bpf_prog_offload_verifier_prep(env->prog);
9609                 if (ret)
9610                         goto skip_full_check;
9611         }
9612
9613         env->explored_states = kvcalloc(state_htab_size(env),
9614                                        sizeof(struct bpf_verifier_state_list *),
9615                                        GFP_USER);
9616         ret = -ENOMEM;
9617         if (!env->explored_states)
9618                 goto skip_full_check;
9619
9620         ret = check_subprogs(env);
9621         if (ret < 0)
9622                 goto skip_full_check;
9623
9624         ret = check_btf_info(env, attr, uattr);
9625         if (ret < 0)
9626                 goto skip_full_check;
9627
9628         ret = check_cfg(env);
9629         if (ret < 0)
9630                 goto skip_full_check;
9631
9632         ret = do_check(env);
9633         if (env->cur_state) {
9634                 free_verifier_state(env->cur_state, true);
9635                 env->cur_state = NULL;
9636         }
9637
9638         if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
9639                 ret = bpf_prog_offload_finalize(env);
9640
9641 skip_full_check:
9642         while (!pop_stack(env, NULL, NULL));
9643         free_states(env);
9644
9645         if (ret == 0)
9646                 ret = check_max_stack_depth(env);
9647
9648         /* instruction rewrites happen after this point */
9649         if (is_priv) {
9650                 if (ret == 0)
9651                         opt_hard_wire_dead_code_branches(env);
9652                 if (ret == 0)
9653                         ret = opt_remove_dead_code(env);
9654                 if (ret == 0)
9655                         ret = opt_remove_nops(env);
9656         } else {
9657                 if (ret == 0)
9658                         sanitize_dead_code(env);
9659         }
9660
9661         if (ret == 0)
9662                 /* program is valid, convert *(u32*)(ctx + off) accesses */
9663                 ret = convert_ctx_accesses(env);
9664
9665         if (ret == 0)
9666                 ret = fixup_bpf_calls(env);
9667
9668         /* do 32-bit optimization after insn patching has done so those patched
9669          * insns could be handled correctly.
9670          */
9671         if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
9672                 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
9673                 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
9674                                                                      : false;
9675         }
9676
9677         if (ret == 0)
9678                 ret = fixup_call_args(env);
9679
9680         env->verification_time = ktime_get_ns() - start_time;
9681         print_verification_stats(env);
9682
9683         if (log->level && bpf_verifier_log_full(log))
9684                 ret = -ENOSPC;
9685         if (log->level && !log->ubuf) {
9686                 ret = -EFAULT;
9687                 goto err_release_maps;
9688         }
9689
9690         if (ret == 0 && env->used_map_cnt) {
9691                 /* if program passed verifier, update used_maps in bpf_prog_info */
9692                 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
9693                                                           sizeof(env->used_maps[0]),
9694                                                           GFP_KERNEL);
9695
9696                 if (!env->prog->aux->used_maps) {
9697                         ret = -ENOMEM;
9698                         goto err_release_maps;
9699                 }
9700
9701                 memcpy(env->prog->aux->used_maps, env->used_maps,
9702                        sizeof(env->used_maps[0]) * env->used_map_cnt);
9703                 env->prog->aux->used_map_cnt = env->used_map_cnt;
9704
9705                 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
9706                  * bpf_ld_imm64 instructions
9707                  */
9708                 convert_pseudo_ld_imm64(env);
9709         }
9710
9711         if (ret == 0)
9712                 adjust_btf_func(env);
9713
9714 err_release_maps:
9715         if (!env->prog->aux->used_maps)
9716                 /* if we didn't copy map pointers into bpf_prog_info, release
9717                  * them now. Otherwise free_used_maps() will release them.
9718                  */
9719                 release_maps(env);
9720         *prog = env->prog;
9721 err_unlock:
9722         if (!is_priv)
9723                 mutex_unlock(&bpf_verifier_lock);
9724         vfree(env->insn_aux_data);
9725 err_free_env:
9726         kfree(env);
9727         return ret;
9728 }