1 // SPDX-License-Identifier: GPL-2.0-only
3 * BPF JIT compiler for LoongArch
5 * Copyright (C) 2022 Loongson Technology Corporation Limited
9 #define REG_TCC LOONGARCH_GPR_A6
10 #define TCC_SAVED LOONGARCH_GPR_S5
12 #define SAVE_RA BIT(0)
13 #define SAVE_TCC BIT(1)
15 static const int regmap[] = {
16 /* return value from in-kernel function, and exit value for eBPF program */
17 [BPF_REG_0] = LOONGARCH_GPR_A5,
18 /* arguments from eBPF program to in-kernel function */
19 [BPF_REG_1] = LOONGARCH_GPR_A0,
20 [BPF_REG_2] = LOONGARCH_GPR_A1,
21 [BPF_REG_3] = LOONGARCH_GPR_A2,
22 [BPF_REG_4] = LOONGARCH_GPR_A3,
23 [BPF_REG_5] = LOONGARCH_GPR_A4,
24 /* callee saved registers that in-kernel function will preserve */
25 [BPF_REG_6] = LOONGARCH_GPR_S0,
26 [BPF_REG_7] = LOONGARCH_GPR_S1,
27 [BPF_REG_8] = LOONGARCH_GPR_S2,
28 [BPF_REG_9] = LOONGARCH_GPR_S3,
29 /* read-only frame pointer to access stack */
30 [BPF_REG_FP] = LOONGARCH_GPR_S4,
31 /* temporary register for blinding constants */
32 [BPF_REG_AX] = LOONGARCH_GPR_T0,
35 static void mark_call(struct jit_ctx *ctx)
37 ctx->flags |= SAVE_RA;
40 static void mark_tail_call(struct jit_ctx *ctx)
42 ctx->flags |= SAVE_TCC;
45 static bool seen_call(struct jit_ctx *ctx)
47 return (ctx->flags & SAVE_RA);
50 static bool seen_tail_call(struct jit_ctx *ctx)
52 return (ctx->flags & SAVE_TCC);
55 static u8 tail_call_reg(struct jit_ctx *ctx)
64 * eBPF prog stack layout:
67 * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP
69 * +-------------------------+
71 * +-------------------------+
73 * +-------------------------+
75 * +-------------------------+
77 * +-------------------------+
79 * +-------------------------+
81 * +-------------------------+
83 * +-------------------------+ <--BPF_REG_FP
84 * | prog->aux->stack_depth |
86 * current $sp -------------> +-------------------------+
89 static void build_prologue(struct jit_ctx *ctx)
91 int stack_adjust = 0, store_offset, bpf_stack_adjust;
93 bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
95 /* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
96 stack_adjust += sizeof(long) * 8;
98 stack_adjust = round_up(stack_adjust, 16);
99 stack_adjust += bpf_stack_adjust;
102 * First instruction initializes the tail call count (TCC).
103 * On tail call we skip this instruction, and the TCC is
104 * passed in REG_TCC from the caller.
106 emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
108 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
110 store_offset = stack_adjust - sizeof(long);
111 emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset);
113 store_offset -= sizeof(long);
114 emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset);
116 store_offset -= sizeof(long);
117 emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset);
119 store_offset -= sizeof(long);
120 emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset);
122 store_offset -= sizeof(long);
123 emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset);
125 store_offset -= sizeof(long);
126 emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset);
128 store_offset -= sizeof(long);
129 emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset);
131 store_offset -= sizeof(long);
132 emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
134 emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
136 if (bpf_stack_adjust)
137 emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
140 * Program contains calls and tail calls, so REG_TCC need
141 * to be saved across calls.
143 if (seen_tail_call(ctx) && seen_call(ctx))
144 move_reg(ctx, TCC_SAVED, REG_TCC);
146 ctx->stack_size = stack_adjust;
149 static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
151 int stack_adjust = ctx->stack_size;
154 load_offset = stack_adjust - sizeof(long);
155 emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset);
157 load_offset -= sizeof(long);
158 emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset);
160 load_offset -= sizeof(long);
161 emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset);
163 load_offset -= sizeof(long);
164 emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset);
166 load_offset -= sizeof(long);
167 emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset);
169 load_offset -= sizeof(long);
170 emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset);
172 load_offset -= sizeof(long);
173 emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset);
175 load_offset -= sizeof(long);
176 emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
178 emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
181 /* Set return value */
182 move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]);
183 /* Return to the caller */
184 emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0);
187 * Call the next bpf prog and skip the first instruction
188 * of TCC initialization.
190 emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1);
194 static void build_epilogue(struct jit_ctx *ctx)
196 __build_epilogue(ctx, false);
199 bool bpf_jit_supports_kfunc_call(void)
204 /* initialized on the first pass of build_body() */
205 static int out_offset = -1;
206 static int emit_bpf_tail_call(struct jit_ctx *ctx)
209 u8 tcc = tail_call_reg(ctx);
210 u8 a1 = LOONGARCH_GPR_A1;
211 u8 a2 = LOONGARCH_GPR_A2;
212 u8 t1 = LOONGARCH_GPR_T1;
213 u8 t2 = LOONGARCH_GPR_T2;
214 u8 t3 = LOONGARCH_GPR_T3;
215 const int idx0 = ctx->idx;
217 #define cur_offset (ctx->idx - idx0)
218 #define jmp_offset (out_offset - (cur_offset))
225 * if (index >= array->map.max_entries)
228 off = offsetof(struct bpf_array, map.max_entries);
229 emit_insn(ctx, ldwu, t1, a1, off);
230 /* bgeu $a2, $t1, jmp_offset */
231 if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
238 emit_insn(ctx, addid, REG_TCC, tcc, -1);
239 if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
243 * prog = array->ptrs[index];
247 emit_insn(ctx, alsld, t2, a2, a1, 2);
248 off = offsetof(struct bpf_array, ptrs);
249 emit_insn(ctx, ldd, t2, t2, off);
250 /* beq $t2, $zero, jmp_offset */
251 if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
254 /* goto *(prog->bpf_func + 4); */
255 off = offsetof(struct bpf_prog, bpf_func);
256 emit_insn(ctx, ldd, t3, t2, off);
257 __build_epilogue(ctx, true);
260 if (out_offset == -1)
261 out_offset = cur_offset;
262 if (cur_offset != out_offset) {
263 pr_err_once("tail_call out_offset = %d, expected %d!\n",
264 cur_offset, out_offset);
271 pr_info_once("tail_call: jump too far\n");
277 static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
279 const u8 t1 = LOONGARCH_GPR_T1;
280 const u8 t2 = LOONGARCH_GPR_T2;
281 const u8 t3 = LOONGARCH_GPR_T3;
282 const u8 r0 = regmap[BPF_REG_0];
283 const u8 src = regmap[insn->src_reg];
284 const u8 dst = regmap[insn->dst_reg];
285 const s16 off = insn->off;
286 const s32 imm = insn->imm;
287 const bool isdw = BPF_SIZE(insn->code) == BPF_DW;
289 move_imm(ctx, t1, off, false);
290 emit_insn(ctx, addd, t1, dst, t1);
291 move_reg(ctx, t3, src);
294 /* lock *(size *)(dst + off) <op>= src */
297 emit_insn(ctx, amaddd, t2, t1, src);
299 emit_insn(ctx, amaddw, t2, t1, src);
303 emit_insn(ctx, amandd, t2, t1, src);
305 emit_insn(ctx, amandw, t2, t1, src);
309 emit_insn(ctx, amord, t2, t1, src);
311 emit_insn(ctx, amorw, t2, t1, src);
315 emit_insn(ctx, amxord, t2, t1, src);
317 emit_insn(ctx, amxorw, t2, t1, src);
319 /* src = atomic_fetch_<op>(dst + off, src) */
320 case BPF_ADD | BPF_FETCH:
322 emit_insn(ctx, amaddd, src, t1, t3);
324 emit_insn(ctx, amaddw, src, t1, t3);
325 emit_zext_32(ctx, src, true);
328 case BPF_AND | BPF_FETCH:
330 emit_insn(ctx, amandd, src, t1, t3);
332 emit_insn(ctx, amandw, src, t1, t3);
333 emit_zext_32(ctx, src, true);
336 case BPF_OR | BPF_FETCH:
338 emit_insn(ctx, amord, src, t1, t3);
340 emit_insn(ctx, amorw, src, t1, t3);
341 emit_zext_32(ctx, src, true);
344 case BPF_XOR | BPF_FETCH:
346 emit_insn(ctx, amxord, src, t1, t3);
348 emit_insn(ctx, amxorw, src, t1, t3);
349 emit_zext_32(ctx, src, true);
352 /* src = atomic_xchg(dst + off, src); */
355 emit_insn(ctx, amswapd, src, t1, t3);
357 emit_insn(ctx, amswapw, src, t1, t3);
358 emit_zext_32(ctx, src, true);
361 /* r0 = atomic_cmpxchg(dst + off, r0, src); */
363 move_reg(ctx, t2, r0);
365 emit_insn(ctx, lld, r0, t1, 0);
366 emit_insn(ctx, bne, t2, r0, 4);
367 move_reg(ctx, t3, src);
368 emit_insn(ctx, scd, t3, t1, 0);
369 emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4);
371 emit_insn(ctx, llw, r0, t1, 0);
372 emit_zext_32(ctx, t2, true);
373 emit_zext_32(ctx, r0, true);
374 emit_insn(ctx, bne, t2, r0, 4);
375 move_reg(ctx, t3, src);
376 emit_insn(ctx, scw, t3, t1, 0);
377 emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
378 emit_zext_32(ctx, r0, true);
384 static bool is_signed_bpf_cond(u8 cond)
386 return cond == BPF_JSGT || cond == BPF_JSLT ||
387 cond == BPF_JSGE || cond == BPF_JSLE;
390 #define BPF_FIXUP_REG_MASK GENMASK(31, 27)
391 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
393 bool ex_handler_bpf(const struct exception_table_entry *ex,
394 struct pt_regs *regs)
396 int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
397 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
399 regs->regs[dst_reg] = 0;
400 regs->csr_era = (unsigned long)&ex->fixup - offset;
405 /* For accesses to BTF pointers, add an entry to the exception table */
406 static int add_exception_handler(const struct bpf_insn *insn,
412 struct exception_table_entry *ex;
414 if (!ctx->image || !ctx->prog->aux->extable)
417 if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
418 BPF_MODE(insn->code) != BPF_PROBE_MEMSX)
421 if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries))
424 ex = &ctx->prog->aux->extable[ctx->num_exentries];
425 pc = (unsigned long)&ctx->image[ctx->idx - 1];
427 offset = pc - (long)&ex->insn;
428 if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
434 * Since the extable follows the program, the fixup offset is always
435 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
436 * to keep things simple, and put the destination register in the upper
437 * bits. We don't need to worry about buildtime or runtime sort
438 * modifying the upper bits because the table is already sorted, and
439 * isn't part of the main exception table.
441 offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE);
442 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
445 ex->type = EX_TYPE_BPF;
446 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
448 ctx->num_exentries++;
453 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
457 bool func_addr_fixed, sign_extend;
458 int i = insn - ctx->prog->insnsi;
460 const u8 code = insn->code;
461 const u8 cond = BPF_OP(code);
462 const u8 t1 = LOONGARCH_GPR_T1;
463 const u8 t2 = LOONGARCH_GPR_T2;
464 const u8 src = regmap[insn->src_reg];
465 const u8 dst = regmap[insn->dst_reg];
466 const s16 off = insn->off;
467 const s32 imm = insn->imm;
468 const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
469 const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
473 case BPF_ALU | BPF_MOV | BPF_X:
474 case BPF_ALU64 | BPF_MOV | BPF_X:
477 move_reg(ctx, dst, src);
478 emit_zext_32(ctx, dst, is32);
481 move_reg(ctx, t1, src);
482 emit_insn(ctx, extwb, dst, t1);
483 emit_zext_32(ctx, dst, is32);
486 move_reg(ctx, t1, src);
487 emit_insn(ctx, extwh, dst, t1);
488 emit_zext_32(ctx, dst, is32);
491 emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO);
497 case BPF_ALU | BPF_MOV | BPF_K:
498 case BPF_ALU64 | BPF_MOV | BPF_K:
499 move_imm(ctx, dst, imm, is32);
502 /* dst = dst + src */
503 case BPF_ALU | BPF_ADD | BPF_X:
504 case BPF_ALU64 | BPF_ADD | BPF_X:
505 emit_insn(ctx, addd, dst, dst, src);
506 emit_zext_32(ctx, dst, is32);
509 /* dst = dst + imm */
510 case BPF_ALU | BPF_ADD | BPF_K:
511 case BPF_ALU64 | BPF_ADD | BPF_K:
512 if (is_signed_imm12(imm)) {
513 emit_insn(ctx, addid, dst, dst, imm);
515 move_imm(ctx, t1, imm, is32);
516 emit_insn(ctx, addd, dst, dst, t1);
518 emit_zext_32(ctx, dst, is32);
521 /* dst = dst - src */
522 case BPF_ALU | BPF_SUB | BPF_X:
523 case BPF_ALU64 | BPF_SUB | BPF_X:
524 emit_insn(ctx, subd, dst, dst, src);
525 emit_zext_32(ctx, dst, is32);
528 /* dst = dst - imm */
529 case BPF_ALU | BPF_SUB | BPF_K:
530 case BPF_ALU64 | BPF_SUB | BPF_K:
531 if (is_signed_imm12(-imm)) {
532 emit_insn(ctx, addid, dst, dst, -imm);
534 move_imm(ctx, t1, imm, is32);
535 emit_insn(ctx, subd, dst, dst, t1);
537 emit_zext_32(ctx, dst, is32);
540 /* dst = dst * src */
541 case BPF_ALU | BPF_MUL | BPF_X:
542 case BPF_ALU64 | BPF_MUL | BPF_X:
543 emit_insn(ctx, muld, dst, dst, src);
544 emit_zext_32(ctx, dst, is32);
547 /* dst = dst * imm */
548 case BPF_ALU | BPF_MUL | BPF_K:
549 case BPF_ALU64 | BPF_MUL | BPF_K:
550 move_imm(ctx, t1, imm, is32);
551 emit_insn(ctx, muld, dst, dst, t1);
552 emit_zext_32(ctx, dst, is32);
555 /* dst = dst / src */
556 case BPF_ALU | BPF_DIV | BPF_X:
557 case BPF_ALU64 | BPF_DIV | BPF_X:
559 emit_zext_32(ctx, dst, is32);
560 move_reg(ctx, t1, src);
561 emit_zext_32(ctx, t1, is32);
562 emit_insn(ctx, divdu, dst, dst, t1);
563 emit_zext_32(ctx, dst, is32);
565 emit_sext_32(ctx, dst, is32);
566 move_reg(ctx, t1, src);
567 emit_sext_32(ctx, t1, is32);
568 emit_insn(ctx, divd, dst, dst, t1);
569 emit_sext_32(ctx, dst, is32);
573 /* dst = dst / imm */
574 case BPF_ALU | BPF_DIV | BPF_K:
575 case BPF_ALU64 | BPF_DIV | BPF_K:
577 move_imm(ctx, t1, imm, is32);
578 emit_zext_32(ctx, dst, is32);
579 emit_insn(ctx, divdu, dst, dst, t1);
580 emit_zext_32(ctx, dst, is32);
582 move_imm(ctx, t1, imm, false);
583 emit_sext_32(ctx, t1, is32);
584 emit_sext_32(ctx, dst, is32);
585 emit_insn(ctx, divd, dst, dst, t1);
586 emit_sext_32(ctx, dst, is32);
590 /* dst = dst % src */
591 case BPF_ALU | BPF_MOD | BPF_X:
592 case BPF_ALU64 | BPF_MOD | BPF_X:
594 emit_zext_32(ctx, dst, is32);
595 move_reg(ctx, t1, src);
596 emit_zext_32(ctx, t1, is32);
597 emit_insn(ctx, moddu, dst, dst, t1);
598 emit_zext_32(ctx, dst, is32);
600 emit_sext_32(ctx, dst, is32);
601 move_reg(ctx, t1, src);
602 emit_sext_32(ctx, t1, is32);
603 emit_insn(ctx, modd, dst, dst, t1);
604 emit_sext_32(ctx, dst, is32);
608 /* dst = dst % imm */
609 case BPF_ALU | BPF_MOD | BPF_K:
610 case BPF_ALU64 | BPF_MOD | BPF_K:
612 move_imm(ctx, t1, imm, is32);
613 emit_zext_32(ctx, dst, is32);
614 emit_insn(ctx, moddu, dst, dst, t1);
615 emit_zext_32(ctx, dst, is32);
617 move_imm(ctx, t1, imm, false);
618 emit_sext_32(ctx, t1, is32);
619 emit_sext_32(ctx, dst, is32);
620 emit_insn(ctx, modd, dst, dst, t1);
621 emit_sext_32(ctx, dst, is32);
626 case BPF_ALU | BPF_NEG:
627 case BPF_ALU64 | BPF_NEG:
628 move_imm(ctx, t1, imm, is32);
629 emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst);
630 emit_zext_32(ctx, dst, is32);
633 /* dst = dst & src */
634 case BPF_ALU | BPF_AND | BPF_X:
635 case BPF_ALU64 | BPF_AND | BPF_X:
636 emit_insn(ctx, and, dst, dst, src);
637 emit_zext_32(ctx, dst, is32);
640 /* dst = dst & imm */
641 case BPF_ALU | BPF_AND | BPF_K:
642 case BPF_ALU64 | BPF_AND | BPF_K:
643 if (is_unsigned_imm12(imm)) {
644 emit_insn(ctx, andi, dst, dst, imm);
646 move_imm(ctx, t1, imm, is32);
647 emit_insn(ctx, and, dst, dst, t1);
649 emit_zext_32(ctx, dst, is32);
652 /* dst = dst | src */
653 case BPF_ALU | BPF_OR | BPF_X:
654 case BPF_ALU64 | BPF_OR | BPF_X:
655 emit_insn(ctx, or, dst, dst, src);
656 emit_zext_32(ctx, dst, is32);
659 /* dst = dst | imm */
660 case BPF_ALU | BPF_OR | BPF_K:
661 case BPF_ALU64 | BPF_OR | BPF_K:
662 if (is_unsigned_imm12(imm)) {
663 emit_insn(ctx, ori, dst, dst, imm);
665 move_imm(ctx, t1, imm, is32);
666 emit_insn(ctx, or, dst, dst, t1);
668 emit_zext_32(ctx, dst, is32);
671 /* dst = dst ^ src */
672 case BPF_ALU | BPF_XOR | BPF_X:
673 case BPF_ALU64 | BPF_XOR | BPF_X:
674 emit_insn(ctx, xor, dst, dst, src);
675 emit_zext_32(ctx, dst, is32);
678 /* dst = dst ^ imm */
679 case BPF_ALU | BPF_XOR | BPF_K:
680 case BPF_ALU64 | BPF_XOR | BPF_K:
681 if (is_unsigned_imm12(imm)) {
682 emit_insn(ctx, xori, dst, dst, imm);
684 move_imm(ctx, t1, imm, is32);
685 emit_insn(ctx, xor, dst, dst, t1);
687 emit_zext_32(ctx, dst, is32);
690 /* dst = dst << src (logical) */
691 case BPF_ALU | BPF_LSH | BPF_X:
692 emit_insn(ctx, sllw, dst, dst, src);
693 emit_zext_32(ctx, dst, is32);
696 case BPF_ALU64 | BPF_LSH | BPF_X:
697 emit_insn(ctx, slld, dst, dst, src);
700 /* dst = dst << imm (logical) */
701 case BPF_ALU | BPF_LSH | BPF_K:
702 emit_insn(ctx, slliw, dst, dst, imm);
703 emit_zext_32(ctx, dst, is32);
706 case BPF_ALU64 | BPF_LSH | BPF_K:
707 emit_insn(ctx, sllid, dst, dst, imm);
710 /* dst = dst >> src (logical) */
711 case BPF_ALU | BPF_RSH | BPF_X:
712 emit_insn(ctx, srlw, dst, dst, src);
713 emit_zext_32(ctx, dst, is32);
716 case BPF_ALU64 | BPF_RSH | BPF_X:
717 emit_insn(ctx, srld, dst, dst, src);
720 /* dst = dst >> imm (logical) */
721 case BPF_ALU | BPF_RSH | BPF_K:
722 emit_insn(ctx, srliw, dst, dst, imm);
723 emit_zext_32(ctx, dst, is32);
726 case BPF_ALU64 | BPF_RSH | BPF_K:
727 emit_insn(ctx, srlid, dst, dst, imm);
730 /* dst = dst >> src (arithmetic) */
731 case BPF_ALU | BPF_ARSH | BPF_X:
732 emit_insn(ctx, sraw, dst, dst, src);
733 emit_zext_32(ctx, dst, is32);
736 case BPF_ALU64 | BPF_ARSH | BPF_X:
737 emit_insn(ctx, srad, dst, dst, src);
740 /* dst = dst >> imm (arithmetic) */
741 case BPF_ALU | BPF_ARSH | BPF_K:
742 emit_insn(ctx, sraiw, dst, dst, imm);
743 emit_zext_32(ctx, dst, is32);
746 case BPF_ALU64 | BPF_ARSH | BPF_K:
747 emit_insn(ctx, sraid, dst, dst, imm);
750 /* dst = BSWAP##imm(dst) */
751 case BPF_ALU | BPF_END | BPF_FROM_LE:
754 /* zero-extend 16 bits into 64 bits */
755 emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
758 /* zero-extend 32 bits into 64 bits */
759 emit_zext_32(ctx, dst, is32);
767 case BPF_ALU | BPF_END | BPF_FROM_BE:
768 case BPF_ALU64 | BPF_END | BPF_FROM_LE:
771 emit_insn(ctx, revb2h, dst, dst);
772 /* zero-extend 16 bits into 64 bits */
773 emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
776 emit_insn(ctx, revb2w, dst, dst);
777 /* clear the upper 32 bits */
778 emit_zext_32(ctx, dst, true);
781 emit_insn(ctx, revbd, dst, dst);
786 /* PC += off if dst cond src */
787 case BPF_JMP | BPF_JEQ | BPF_X:
788 case BPF_JMP | BPF_JNE | BPF_X:
789 case BPF_JMP | BPF_JGT | BPF_X:
790 case BPF_JMP | BPF_JGE | BPF_X:
791 case BPF_JMP | BPF_JLT | BPF_X:
792 case BPF_JMP | BPF_JLE | BPF_X:
793 case BPF_JMP | BPF_JSGT | BPF_X:
794 case BPF_JMP | BPF_JSGE | BPF_X:
795 case BPF_JMP | BPF_JSLT | BPF_X:
796 case BPF_JMP | BPF_JSLE | BPF_X:
797 case BPF_JMP32 | BPF_JEQ | BPF_X:
798 case BPF_JMP32 | BPF_JNE | BPF_X:
799 case BPF_JMP32 | BPF_JGT | BPF_X:
800 case BPF_JMP32 | BPF_JGE | BPF_X:
801 case BPF_JMP32 | BPF_JLT | BPF_X:
802 case BPF_JMP32 | BPF_JLE | BPF_X:
803 case BPF_JMP32 | BPF_JSGT | BPF_X:
804 case BPF_JMP32 | BPF_JSGE | BPF_X:
805 case BPF_JMP32 | BPF_JSLT | BPF_X:
806 case BPF_JMP32 | BPF_JSLE | BPF_X:
807 jmp_offset = bpf2la_offset(i, off, ctx);
808 move_reg(ctx, t1, dst);
809 move_reg(ctx, t2, src);
810 if (is_signed_bpf_cond(BPF_OP(code))) {
811 emit_sext_32(ctx, t1, is32);
812 emit_sext_32(ctx, t2, is32);
814 emit_zext_32(ctx, t1, is32);
815 emit_zext_32(ctx, t2, is32);
817 if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0)
821 /* PC += off if dst cond imm */
822 case BPF_JMP | BPF_JEQ | BPF_K:
823 case BPF_JMP | BPF_JNE | BPF_K:
824 case BPF_JMP | BPF_JGT | BPF_K:
825 case BPF_JMP | BPF_JGE | BPF_K:
826 case BPF_JMP | BPF_JLT | BPF_K:
827 case BPF_JMP | BPF_JLE | BPF_K:
828 case BPF_JMP | BPF_JSGT | BPF_K:
829 case BPF_JMP | BPF_JSGE | BPF_K:
830 case BPF_JMP | BPF_JSLT | BPF_K:
831 case BPF_JMP | BPF_JSLE | BPF_K:
832 case BPF_JMP32 | BPF_JEQ | BPF_K:
833 case BPF_JMP32 | BPF_JNE | BPF_K:
834 case BPF_JMP32 | BPF_JGT | BPF_K:
835 case BPF_JMP32 | BPF_JGE | BPF_K:
836 case BPF_JMP32 | BPF_JLT | BPF_K:
837 case BPF_JMP32 | BPF_JLE | BPF_K:
838 case BPF_JMP32 | BPF_JSGT | BPF_K:
839 case BPF_JMP32 | BPF_JSGE | BPF_K:
840 case BPF_JMP32 | BPF_JSLT | BPF_K:
841 case BPF_JMP32 | BPF_JSLE | BPF_K:
842 jmp_offset = bpf2la_offset(i, off, ctx);
844 move_imm(ctx, t1, imm, false);
847 /* If imm is 0, simply use zero register. */
848 tm = LOONGARCH_GPR_ZERO;
850 move_reg(ctx, t2, dst);
851 if (is_signed_bpf_cond(BPF_OP(code))) {
852 emit_sext_32(ctx, tm, is32);
853 emit_sext_32(ctx, t2, is32);
855 emit_zext_32(ctx, tm, is32);
856 emit_zext_32(ctx, t2, is32);
858 if (emit_cond_jmp(ctx, cond, t2, tm, jmp_offset) < 0)
862 /* PC += off if dst & src */
863 case BPF_JMP | BPF_JSET | BPF_X:
864 case BPF_JMP32 | BPF_JSET | BPF_X:
865 jmp_offset = bpf2la_offset(i, off, ctx);
866 emit_insn(ctx, and, t1, dst, src);
867 emit_zext_32(ctx, t1, is32);
868 if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
872 /* PC += off if dst & imm */
873 case BPF_JMP | BPF_JSET | BPF_K:
874 case BPF_JMP32 | BPF_JSET | BPF_K:
875 jmp_offset = bpf2la_offset(i, off, ctx);
876 move_imm(ctx, t1, imm, is32);
877 emit_insn(ctx, and, t1, dst, t1);
878 emit_zext_32(ctx, t1, is32);
879 if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
884 case BPF_JMP | BPF_JA:
885 case BPF_JMP32 | BPF_JA:
886 if (BPF_CLASS(code) == BPF_JMP)
887 jmp_offset = bpf2la_offset(i, off, ctx);
889 jmp_offset = bpf2la_offset(i, imm, ctx);
890 if (emit_uncond_jmp(ctx, jmp_offset) < 0)
895 case BPF_JMP | BPF_CALL:
897 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
898 &func_addr, &func_addr_fixed);
902 move_addr(ctx, t1, func_addr);
903 emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0);
904 move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
908 case BPF_JMP | BPF_TAIL_CALL:
910 if (emit_bpf_tail_call(ctx) < 0)
914 /* function return */
915 case BPF_JMP | BPF_EXIT:
916 if (i == ctx->prog->len - 1)
919 jmp_offset = epilogue_offset(ctx);
920 if (emit_uncond_jmp(ctx, jmp_offset) < 0)
925 case BPF_LD | BPF_IMM | BPF_DW:
926 move_imm(ctx, dst, imm64, is32);
929 /* dst = *(size *)(src + off) */
930 case BPF_LDX | BPF_MEM | BPF_B:
931 case BPF_LDX | BPF_MEM | BPF_H:
932 case BPF_LDX | BPF_MEM | BPF_W:
933 case BPF_LDX | BPF_MEM | BPF_DW:
934 case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
935 case BPF_LDX | BPF_PROBE_MEM | BPF_W:
936 case BPF_LDX | BPF_PROBE_MEM | BPF_H:
937 case BPF_LDX | BPF_PROBE_MEM | BPF_B:
938 /* dst_reg = (s64)*(signed size *)(src_reg + off) */
939 case BPF_LDX | BPF_MEMSX | BPF_B:
940 case BPF_LDX | BPF_MEMSX | BPF_H:
941 case BPF_LDX | BPF_MEMSX | BPF_W:
942 case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
943 case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
944 case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
945 sign_extend = BPF_MODE(insn->code) == BPF_MEMSX ||
946 BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
947 switch (BPF_SIZE(code)) {
949 if (is_signed_imm12(off)) {
951 emit_insn(ctx, ldb, dst, src, off);
953 emit_insn(ctx, ldbu, dst, src, off);
955 move_imm(ctx, t1, off, is32);
957 emit_insn(ctx, ldxb, dst, src, t1);
959 emit_insn(ctx, ldxbu, dst, src, t1);
963 if (is_signed_imm12(off)) {
965 emit_insn(ctx, ldh, dst, src, off);
967 emit_insn(ctx, ldhu, dst, src, off);
969 move_imm(ctx, t1, off, is32);
971 emit_insn(ctx, ldxh, dst, src, t1);
973 emit_insn(ctx, ldxhu, dst, src, t1);
977 if (is_signed_imm12(off)) {
979 emit_insn(ctx, ldw, dst, src, off);
981 emit_insn(ctx, ldwu, dst, src, off);
983 move_imm(ctx, t1, off, is32);
985 emit_insn(ctx, ldxw, dst, src, t1);
987 emit_insn(ctx, ldxwu, dst, src, t1);
991 move_imm(ctx, t1, off, is32);
992 emit_insn(ctx, ldxd, dst, src, t1);
996 ret = add_exception_handler(insn, ctx, dst);
1001 /* *(size *)(dst + off) = imm */
1002 case BPF_ST | BPF_MEM | BPF_B:
1003 case BPF_ST | BPF_MEM | BPF_H:
1004 case BPF_ST | BPF_MEM | BPF_W:
1005 case BPF_ST | BPF_MEM | BPF_DW:
1006 switch (BPF_SIZE(code)) {
1008 move_imm(ctx, t1, imm, is32);
1009 if (is_signed_imm12(off)) {
1010 emit_insn(ctx, stb, t1, dst, off);
1012 move_imm(ctx, t2, off, is32);
1013 emit_insn(ctx, stxb, t1, dst, t2);
1017 move_imm(ctx, t1, imm, is32);
1018 if (is_signed_imm12(off)) {
1019 emit_insn(ctx, sth, t1, dst, off);
1021 move_imm(ctx, t2, off, is32);
1022 emit_insn(ctx, stxh, t1, dst, t2);
1026 move_imm(ctx, t1, imm, is32);
1027 if (is_signed_imm12(off)) {
1028 emit_insn(ctx, stw, t1, dst, off);
1029 } else if (is_signed_imm14(off)) {
1030 emit_insn(ctx, stptrw, t1, dst, off);
1032 move_imm(ctx, t2, off, is32);
1033 emit_insn(ctx, stxw, t1, dst, t2);
1037 move_imm(ctx, t1, imm, is32);
1038 if (is_signed_imm12(off)) {
1039 emit_insn(ctx, std, t1, dst, off);
1040 } else if (is_signed_imm14(off)) {
1041 emit_insn(ctx, stptrd, t1, dst, off);
1043 move_imm(ctx, t2, off, is32);
1044 emit_insn(ctx, stxd, t1, dst, t2);
1050 /* *(size *)(dst + off) = src */
1051 case BPF_STX | BPF_MEM | BPF_B:
1052 case BPF_STX | BPF_MEM | BPF_H:
1053 case BPF_STX | BPF_MEM | BPF_W:
1054 case BPF_STX | BPF_MEM | BPF_DW:
1055 switch (BPF_SIZE(code)) {
1057 if (is_signed_imm12(off)) {
1058 emit_insn(ctx, stb, src, dst, off);
1060 move_imm(ctx, t1, off, is32);
1061 emit_insn(ctx, stxb, src, dst, t1);
1065 if (is_signed_imm12(off)) {
1066 emit_insn(ctx, sth, src, dst, off);
1068 move_imm(ctx, t1, off, is32);
1069 emit_insn(ctx, stxh, src, dst, t1);
1073 if (is_signed_imm12(off)) {
1074 emit_insn(ctx, stw, src, dst, off);
1075 } else if (is_signed_imm14(off)) {
1076 emit_insn(ctx, stptrw, src, dst, off);
1078 move_imm(ctx, t1, off, is32);
1079 emit_insn(ctx, stxw, src, dst, t1);
1083 if (is_signed_imm12(off)) {
1084 emit_insn(ctx, std, src, dst, off);
1085 } else if (is_signed_imm14(off)) {
1086 emit_insn(ctx, stptrd, src, dst, off);
1088 move_imm(ctx, t1, off, is32);
1089 emit_insn(ctx, stxd, src, dst, t1);
1095 case BPF_STX | BPF_ATOMIC | BPF_W:
1096 case BPF_STX | BPF_ATOMIC | BPF_DW:
1097 emit_atomic(insn, ctx);
1100 /* Speculation barrier */
1101 case BPF_ST | BPF_NOSPEC:
1105 pr_err("bpf_jit: unknown opcode %02x\n", code);
1112 pr_info_once("bpf_jit: opcode %02x, jump too far\n", code);
1116 static int build_body(struct jit_ctx *ctx, bool extra_pass)
1119 const struct bpf_prog *prog = ctx->prog;
1121 for (i = 0; i < prog->len; i++) {
1122 const struct bpf_insn *insn = &prog->insnsi[i];
1125 if (ctx->image == NULL)
1126 ctx->offset[i] = ctx->idx;
1128 ret = build_insn(insn, ctx, extra_pass);
1131 if (ctx->image == NULL)
1132 ctx->offset[i] = ctx->idx;
1139 if (ctx->image == NULL)
1140 ctx->offset[i] = ctx->idx;
1145 /* Fill space with break instructions */
1146 static void jit_fill_hole(void *area, unsigned int size)
1150 /* We are guaranteed to have aligned memory */
1151 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
1152 *ptr++ = INSN_BREAK;
1155 static int validate_code(struct jit_ctx *ctx)
1158 union loongarch_instruction insn;
1160 for (i = 0; i < ctx->idx; i++) {
1161 insn = ctx->image[i];
1162 /* Check INSN_BREAK */
1163 if (insn.word == INSN_BREAK)
1167 if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries))
1173 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1175 bool tmp_blinded = false, extra_pass = false;
1177 int image_size, prog_size, extable_size;
1179 struct jit_data *jit_data;
1180 struct bpf_binary_header *header;
1181 struct bpf_prog *tmp, *orig_prog = prog;
1184 * If BPF JIT was not enabled then we must fall back to
1187 if (!prog->jit_requested)
1190 tmp = bpf_jit_blind_constants(prog);
1192 * If blinding was requested and we failed during blinding,
1193 * we must fall back to the interpreter. Otherwise, we save
1194 * the new JITed code.
1204 jit_data = prog->aux->jit_data;
1206 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1211 prog->aux->jit_data = jit_data;
1213 if (jit_data->ctx.offset) {
1214 ctx = jit_data->ctx;
1215 image_ptr = jit_data->image;
1216 header = jit_data->header;
1218 prog_size = sizeof(u32) * ctx.idx;
1222 memset(&ctx, 0, sizeof(ctx));
1225 ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
1226 if (ctx.offset == NULL) {
1231 /* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
1232 build_prologue(&ctx);
1233 if (build_body(&ctx, extra_pass)) {
1237 ctx.epilogue_offset = ctx.idx;
1238 build_epilogue(&ctx);
1240 extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry);
1242 /* Now we know the actual image size.
1243 * As each LoongArch instruction is of length 32bit,
1244 * we are translating number of JITed intructions into
1245 * the size required to store these JITed code.
1247 prog_size = sizeof(u32) * ctx.idx;
1248 image_size = prog_size + extable_size;
1249 /* Now we know the size of the structure to make */
1250 header = bpf_jit_binary_alloc(image_size, &image_ptr,
1251 sizeof(u32), jit_fill_hole);
1252 if (header == NULL) {
1257 /* 2. Now, the actual pass to generate final JIT code */
1258 ctx.image = (union loongarch_instruction *)image_ptr;
1260 prog->aux->extable = (void *)image_ptr + prog_size;
1264 ctx.num_exentries = 0;
1266 build_prologue(&ctx);
1267 if (build_body(&ctx, extra_pass)) {
1268 bpf_jit_binary_free(header);
1272 build_epilogue(&ctx);
1274 /* 3. Extra pass to validate JITed code */
1275 if (validate_code(&ctx)) {
1276 bpf_jit_binary_free(header);
1281 /* And we're done */
1282 if (bpf_jit_enable > 1)
1283 bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
1285 /* Update the icache */
1286 flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx));
1288 if (!prog->is_func || extra_pass) {
1289 if (extra_pass && ctx.idx != jit_data->ctx.idx) {
1290 pr_err_once("multi-func JIT bug %d != %d\n",
1291 ctx.idx, jit_data->ctx.idx);
1292 bpf_jit_binary_free(header);
1293 prog->bpf_func = NULL;
1295 prog->jited_len = 0;
1298 bpf_jit_binary_lock_ro(header);
1300 jit_data->ctx = ctx;
1301 jit_data->image = image_ptr;
1302 jit_data->header = header;
1305 prog->jited_len = prog_size;
1306 prog->bpf_func = (void *)ctx.image;
1308 if (!prog->is_func || extra_pass) {
1311 /* offset[prog->len] is the size of program */
1312 for (i = 0; i <= prog->len; i++)
1313 ctx.offset[i] *= LOONGARCH_INSN_SIZE;
1314 bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
1319 prog->aux->jit_data = NULL;
1324 bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
1331 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
1332 bool bpf_jit_supports_subprog_tailcalls(void)