2 * bpf_jit_comp64.c: eBPF JIT compiler
4 * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
7 * Based on the powerpc classic BPF JIT compiler by Matt Evans
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; version 2
14 #include <linux/moduleloader.h>
15 #include <asm/cacheflush.h>
16 #include <linux/netdevice.h>
17 #include <linux/filter.h>
18 #include <linux/if_vlan.h>
19 #include <asm/kprobes.h>
20 #include <linux/bpf.h>
22 #include "bpf_jit64.h"
24 static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
26 memset32(area, BREAKPOINT_INSTRUCTION, size/4);
29 static inline void bpf_flush_icache(void *start, void *end)
32 flush_icache_range((unsigned long)start, (unsigned long)end);
35 static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
37 return (ctx->seen & (1 << (31 - b2p[i])));
40 static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
42 ctx->seen |= (1 << (31 - b2p[i]));
45 static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
48 * We only need a stack frame if:
49 * - we call other functions (kernel helpers), or
50 * - the bpf program uses its stack area
51 * The latter condition is deduced from the usage of BPF_REG_FP
53 return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP);
57 * When not setting up our own stackframe, the redzone usage is:
59 * [ prev sp ] <-------------
61 * sp (r1) ---> [ stack pointer ] --------------
62 * [ nv gpr save area ] 8*8
65 * [ unused red zone ] 208 bytes protected
67 static int bpf_jit_stack_local(struct codegen_context *ctx)
69 if (bpf_has_stack_frame(ctx))
70 return STACK_FRAME_MIN_SIZE + MAX_BPF_STACK;
72 return -(BPF_PPC_STACK_SAVE + 16);
75 static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
77 return bpf_jit_stack_local(ctx) + 8;
80 static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
82 if (reg >= BPF_PPC_NVR_MIN && reg < 32)
83 return (bpf_has_stack_frame(ctx) ? BPF_PPC_STACKFRAME : 0)
86 pr_err("BPF JIT is asking about unknown registers");
90 static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx)
93 * Load skb->len and skb->data_len
96 PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len));
97 PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len));
98 /* header_len = len - data_len */
99 PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]);
101 /* skb->data pointer */
102 PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data));
105 static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
110 * Initialize tail_call_cnt if we do tail calls.
111 * Otherwise, put in NOPs so that it can be skipped when we are
112 * invoked through a tail call.
114 if (ctx->seen & SEEN_TAILCALL) {
115 PPC_LI(b2p[TMP_REG_1], 0);
116 /* this goes in the redzone */
117 PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8));
123 #define BPF_TAILCALL_PROLOGUE_SIZE 8
125 if (bpf_has_stack_frame(ctx)) {
127 * We need a stack frame, but we don't necessarily need to
128 * save/restore LR unless we call other functions
130 if (ctx->seen & SEEN_FUNC) {
131 EMIT(PPC_INST_MFLR | __PPC_RT(R0));
132 PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
135 PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME);
139 * Back up non-volatile regs -- BPF registers 6-10
140 * If we haven't created our own stack frame, we save these
141 * in the protected zone below the previous stack frame
143 for (i = BPF_REG_6; i <= BPF_REG_10; i++)
144 if (bpf_is_seen_register(ctx, i))
145 PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
148 * Save additional non-volatile regs if we cache skb
149 * Also, setup skb data
151 if (ctx->seen & SEEN_SKB) {
152 PPC_BPF_STL(b2p[SKB_HLEN_REG], 1,
153 bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
154 PPC_BPF_STL(b2p[SKB_DATA_REG], 1,
155 bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
156 bpf_jit_emit_skb_loads(image, ctx);
159 /* Setup frame pointer to point to the bpf stack area */
160 if (bpf_is_seen_register(ctx, BPF_REG_FP))
161 PPC_ADDI(b2p[BPF_REG_FP], 1,
162 STACK_FRAME_MIN_SIZE + MAX_BPF_STACK);
165 static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
170 for (i = BPF_REG_6; i <= BPF_REG_10; i++)
171 if (bpf_is_seen_register(ctx, i))
172 PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
174 /* Restore non-volatile registers used for skb cache */
175 if (ctx->seen & SEEN_SKB) {
176 PPC_BPF_LL(b2p[SKB_HLEN_REG], 1,
177 bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
178 PPC_BPF_LL(b2p[SKB_DATA_REG], 1,
179 bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
182 /* Tear down our stack frame */
183 if (bpf_has_stack_frame(ctx)) {
184 PPC_ADDI(1, 1, BPF_PPC_STACKFRAME);
185 if (ctx->seen & SEEN_FUNC) {
186 PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
192 static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
194 bpf_jit_emit_common_epilogue(image, ctx);
196 /* Move result to r3 */
197 PPC_MR(3, b2p[BPF_REG_0]);
202 static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
204 unsigned int i, ctx_idx = ctx->idx;
206 /* Load function address into r12 */
209 /* For bpf-to-bpf function calls, the callee's address is unknown
210 * until the last extra pass. As seen above, we use PPC_LI64() to
211 * load the callee's address, but this may optimize the number of
212 * instructions required based on the nature of the address.
214 * Since we don't want the number of instructions emitted to change,
215 * we pad the optimized PPC_LI64() call with NOPs to guarantee that
216 * we always have a five-instruction sequence, which is the maximum
217 * that PPC_LI64() can emit.
219 for (i = ctx->idx - ctx_idx; i < 5; i++)
222 #ifdef PPC64_ELF_ABI_v1
224 * Load TOC from function descriptor at offset 8.
225 * We can clobber r2 since we get called through a
226 * function pointer (so caller will save/restore r2)
227 * and since we don't use a TOC ourself.
229 PPC_BPF_LL(2, 12, 8);
230 /* Load actual entry point from function descriptor */
231 PPC_BPF_LL(12, 12, 0);
238 static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
241 * By now, the eBPF program has already setup parameters in r3, r4 and r5
242 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
243 * r4/BPF_REG_2 - pointer to bpf_array
244 * r5/BPF_REG_3 - index in bpf_array
246 int b2p_bpf_array = b2p[BPF_REG_2];
247 int b2p_index = b2p[BPF_REG_3];
250 * if (index >= array->map.max_entries)
253 PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries));
254 PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31);
255 PPC_CMPLW(b2p_index, b2p[TMP_REG_1]);
256 PPC_BCC(COND_GE, out);
259 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
262 PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
263 PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT);
264 PPC_BCC(COND_GT, out);
269 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1);
270 PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
272 /* prog = array->ptrs[index]; */
273 PPC_MULI(b2p[TMP_REG_1], b2p_index, 8);
274 PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array);
275 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
281 PPC_CMPLDI(b2p[TMP_REG_1], 0);
282 PPC_BCC(COND_EQ, out);
284 /* goto *(prog->bpf_func + prologue_size); */
285 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
286 #ifdef PPC64_ELF_ABI_v1
287 /* skip past the function descriptor */
288 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
289 FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE);
291 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE);
293 PPC_MTCTR(b2p[TMP_REG_1]);
295 /* tear down stack, restore NVRs, ... */
296 bpf_jit_emit_common_epilogue(image, ctx);
304 /* Assemble the body code between the prologue & epilogue */
305 static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
306 struct codegen_context *ctx,
309 const struct bpf_insn *insn = fp->insnsi;
313 /* Start of epilogue code - will only be valid 2nd pass onwards */
314 u32 exit_addr = addrs[flen];
316 for (i = 0; i < flen; i++) {
317 u32 code = insn[i].code;
318 u32 dst_reg = b2p[insn[i].dst_reg];
319 u32 src_reg = b2p[insn[i].src_reg];
320 s16 off = insn[i].off;
321 s32 imm = insn[i].imm;
328 * addrs[] maps a BPF bytecode address into a real offset from
329 * the start of the body code.
331 addrs[i] = ctx->idx * 4;
334 * As an optimization, we note down which non-volatile registers
335 * are used so that we can only save/restore those in our
336 * prologue and epilogue. We do this here regardless of whether
337 * the actual BPF instruction uses src/dst registers or not
338 * (for instance, BPF_CALL does not use them). The expectation
339 * is that those instructions will have src_reg/dst_reg set to
340 * 0. Even otherwise, we just lose some prologue/epilogue
341 * optimization but everything else should work without
344 if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
345 bpf_set_seen_register(ctx, insn[i].dst_reg);
346 if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
347 bpf_set_seen_register(ctx, insn[i].src_reg);
351 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
353 case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
354 case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
355 PPC_ADD(dst_reg, dst_reg, src_reg);
356 goto bpf_alu32_trunc;
357 case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
358 case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
359 PPC_SUB(dst_reg, dst_reg, src_reg);
360 goto bpf_alu32_trunc;
361 case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
362 case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
364 goto bpf_alu32_trunc;
365 } else if (imm >= -32768 && imm < 32768) {
366 PPC_ADDI(dst_reg, dst_reg, IMM_L(imm));
368 PPC_LI32(b2p[TMP_REG_1], imm);
369 PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]);
371 goto bpf_alu32_trunc;
372 case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
373 case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
375 goto bpf_alu32_trunc;
376 } else if (imm > -32768 && imm <= 32768) {
377 PPC_ADDI(dst_reg, dst_reg, IMM_L(-imm));
379 PPC_LI32(b2p[TMP_REG_1], imm);
380 PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
382 goto bpf_alu32_trunc;
383 case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
384 case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
385 if (BPF_CLASS(code) == BPF_ALU)
386 PPC_MULW(dst_reg, dst_reg, src_reg);
388 PPC_MULD(dst_reg, dst_reg, src_reg);
389 goto bpf_alu32_trunc;
390 case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
391 case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
392 if (imm >= -32768 && imm < 32768)
393 PPC_MULI(dst_reg, dst_reg, IMM_L(imm));
395 PPC_LI32(b2p[TMP_REG_1], imm);
396 if (BPF_CLASS(code) == BPF_ALU)
397 PPC_MULW(dst_reg, dst_reg,
400 PPC_MULD(dst_reg, dst_reg,
403 goto bpf_alu32_trunc;
404 case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
405 case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
406 PPC_CMPWI(src_reg, 0);
407 PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12);
408 PPC_LI(b2p[BPF_REG_0], 0);
410 if (BPF_OP(code) == BPF_MOD) {
411 PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg);
412 PPC_MULW(b2p[TMP_REG_1], src_reg,
414 PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
416 PPC_DIVWU(dst_reg, dst_reg, src_reg);
417 goto bpf_alu32_trunc;
418 case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
419 case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
420 PPC_CMPDI(src_reg, 0);
421 PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12);
422 PPC_LI(b2p[BPF_REG_0], 0);
424 if (BPF_OP(code) == BPF_MOD) {
425 PPC_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg);
426 PPC_MULD(b2p[TMP_REG_1], src_reg,
428 PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
430 PPC_DIVDU(dst_reg, dst_reg, src_reg);
432 case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
433 case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
434 case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
435 case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
439 if (BPF_OP(code) == BPF_DIV) {
440 goto bpf_alu32_trunc;
447 PPC_LI32(b2p[TMP_REG_1], imm);
448 switch (BPF_CLASS(code)) {
450 if (BPF_OP(code) == BPF_MOD) {
451 PPC_DIVWU(b2p[TMP_REG_2], dst_reg,
453 PPC_MULW(b2p[TMP_REG_1],
456 PPC_SUB(dst_reg, dst_reg,
459 PPC_DIVWU(dst_reg, dst_reg,
463 if (BPF_OP(code) == BPF_MOD) {
464 PPC_DIVDU(b2p[TMP_REG_2], dst_reg,
466 PPC_MULD(b2p[TMP_REG_1],
469 PPC_SUB(dst_reg, dst_reg,
472 PPC_DIVDU(dst_reg, dst_reg,
476 goto bpf_alu32_trunc;
477 case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
478 case BPF_ALU64 | BPF_NEG: /* dst = -dst */
479 PPC_NEG(dst_reg, dst_reg);
480 goto bpf_alu32_trunc;
483 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
485 case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
486 case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
487 PPC_AND(dst_reg, dst_reg, src_reg);
488 goto bpf_alu32_trunc;
489 case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
490 case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
492 PPC_ANDI(dst_reg, dst_reg, IMM_L(imm));
495 PPC_LI32(b2p[TMP_REG_1], imm);
496 PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]);
498 goto bpf_alu32_trunc;
499 case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
500 case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
501 PPC_OR(dst_reg, dst_reg, src_reg);
502 goto bpf_alu32_trunc;
503 case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
504 case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
505 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
507 PPC_LI32(b2p[TMP_REG_1], imm);
508 PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]);
511 PPC_ORI(dst_reg, dst_reg, IMM_L(imm));
513 PPC_ORIS(dst_reg, dst_reg, IMM_H(imm));
515 goto bpf_alu32_trunc;
516 case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
517 case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
518 PPC_XOR(dst_reg, dst_reg, src_reg);
519 goto bpf_alu32_trunc;
520 case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
521 case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
522 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
524 PPC_LI32(b2p[TMP_REG_1], imm);
525 PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]);
528 PPC_XORI(dst_reg, dst_reg, IMM_L(imm));
530 PPC_XORIS(dst_reg, dst_reg, IMM_H(imm));
532 goto bpf_alu32_trunc;
533 case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
534 /* slw clears top 32 bits */
535 PPC_SLW(dst_reg, dst_reg, src_reg);
537 case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
538 PPC_SLD(dst_reg, dst_reg, src_reg);
540 case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
541 /* with imm 0, we still need to clear top 32 bits */
542 PPC_SLWI(dst_reg, dst_reg, imm);
544 case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
546 PPC_SLDI(dst_reg, dst_reg, imm);
548 case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
549 PPC_SRW(dst_reg, dst_reg, src_reg);
551 case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
552 PPC_SRD(dst_reg, dst_reg, src_reg);
554 case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
555 PPC_SRWI(dst_reg, dst_reg, imm);
557 case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
559 PPC_SRDI(dst_reg, dst_reg, imm);
561 case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
562 PPC_SRAD(dst_reg, dst_reg, src_reg);
564 case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
566 PPC_SRADI(dst_reg, dst_reg, imm);
572 case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
573 case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
574 PPC_MR(dst_reg, src_reg);
575 goto bpf_alu32_trunc;
576 case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
577 case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
578 PPC_LI32(dst_reg, imm);
580 goto bpf_alu32_trunc;
584 /* Truncate to 32-bits */
585 if (BPF_CLASS(code) == BPF_ALU)
586 PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
592 case BPF_ALU | BPF_END | BPF_FROM_LE:
593 case BPF_ALU | BPF_END | BPF_FROM_BE:
594 #ifdef __BIG_ENDIAN__
595 if (BPF_SRC(code) == BPF_FROM_BE)
597 #else /* !__BIG_ENDIAN__ */
598 if (BPF_SRC(code) == BPF_FROM_LE)
603 /* Rotate 8 bits left & mask with 0x0000ff00 */
604 PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23);
605 /* Rotate 8 bits right & insert LSB to reg */
606 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31);
607 /* Move result back to dst_reg */
608 PPC_MR(dst_reg, b2p[TMP_REG_1]);
612 * Rotate word left by 8 bits:
613 * 2 bytes are already in their final position
614 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
616 PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31);
617 /* Rotate 24 bits and insert byte 1 */
618 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7);
619 /* Rotate 24 bits and insert byte 3 */
620 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23);
621 PPC_MR(dst_reg, b2p[TMP_REG_1]);
625 * Way easier and faster(?) to store the value
626 * into stack and then use ldbrx
628 * ctx->seen will be reliable in pass2, but
629 * the instructions generated will remain the
630 * same across all passes
632 PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
633 PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx));
634 PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
642 /* zero-extend 16 bits into 64 bits */
643 PPC_RLDICL(dst_reg, dst_reg, 0, 48);
646 /* zero-extend 32 bits into 64 bits */
647 PPC_RLDICL(dst_reg, dst_reg, 0, 32);
658 case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
659 case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
660 if (BPF_CLASS(code) == BPF_ST) {
661 PPC_LI(b2p[TMP_REG_1], imm);
662 src_reg = b2p[TMP_REG_1];
664 PPC_STB(src_reg, dst_reg, off);
666 case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
667 case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
668 if (BPF_CLASS(code) == BPF_ST) {
669 PPC_LI(b2p[TMP_REG_1], imm);
670 src_reg = b2p[TMP_REG_1];
672 PPC_STH(src_reg, dst_reg, off);
674 case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
675 case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
676 if (BPF_CLASS(code) == BPF_ST) {
677 PPC_LI32(b2p[TMP_REG_1], imm);
678 src_reg = b2p[TMP_REG_1];
680 PPC_STW(src_reg, dst_reg, off);
682 case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
683 case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
684 if (BPF_CLASS(code) == BPF_ST) {
685 PPC_LI32(b2p[TMP_REG_1], imm);
686 src_reg = b2p[TMP_REG_1];
688 PPC_BPF_STL(src_reg, dst_reg, off);
692 * BPF_STX XADD (atomic_add)
694 /* *(u32 *)(dst + off) += src */
695 case BPF_STX | BPF_XADD | BPF_W:
696 /* Get EA into TMP_REG_1 */
697 PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
698 tmp_idx = ctx->idx * 4;
699 /* load value from memory into TMP_REG_2 */
700 PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
701 /* add value from src_reg into this */
702 PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
703 /* store result back */
704 PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
705 /* we're done if this succeeded */
706 PPC_BCC_SHORT(COND_NE, tmp_idx);
708 /* *(u64 *)(dst + off) += src */
709 case BPF_STX | BPF_XADD | BPF_DW:
710 PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
711 tmp_idx = ctx->idx * 4;
712 PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
713 PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
714 PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
715 PPC_BCC_SHORT(COND_NE, tmp_idx);
721 /* dst = *(u8 *)(ul) (src + off) */
722 case BPF_LDX | BPF_MEM | BPF_B:
723 PPC_LBZ(dst_reg, src_reg, off);
725 /* dst = *(u16 *)(ul) (src + off) */
726 case BPF_LDX | BPF_MEM | BPF_H:
727 PPC_LHZ(dst_reg, src_reg, off);
729 /* dst = *(u32 *)(ul) (src + off) */
730 case BPF_LDX | BPF_MEM | BPF_W:
731 PPC_LWZ(dst_reg, src_reg, off);
733 /* dst = *(u64 *)(ul) (src + off) */
734 case BPF_LDX | BPF_MEM | BPF_DW:
735 PPC_BPF_LL(dst_reg, src_reg, off);
740 * 16 byte instruction that uses two 'struct bpf_insn'
742 case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
743 imm64 = ((u64)(u32) insn[i].imm) |
744 (((u64)(u32) insn[i+1].imm) << 32);
745 /* Adjust for two bpf instructions */
746 addrs[++i] = ctx->idx * 4;
747 PPC_LI64(dst_reg, imm64);
753 case BPF_JMP | BPF_EXIT:
755 * If this isn't the very last instruction, branch to
756 * the epilogue. If we _are_ the last instruction,
757 * we'll just fall through to the epilogue.
761 /* else fall through to the epilogue */
767 case BPF_JMP | BPF_CALL:
768 ctx->seen |= SEEN_FUNC;
769 func = (u8 *) __bpf_call_base + imm;
771 /* Save skb pointer if we need to re-cache skb data */
772 if ((ctx->seen & SEEN_SKB) &&
773 bpf_helper_changes_pkt_data(func))
774 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
776 bpf_jit_emit_func_call(image, ctx, (u64)func);
778 /* move return value from r3 to BPF_REG_0 */
779 PPC_MR(b2p[BPF_REG_0], 3);
781 /* refresh skb cache */
782 if ((ctx->seen & SEEN_SKB) &&
783 bpf_helper_changes_pkt_data(func)) {
784 /* reload skb pointer to r3 */
785 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
786 bpf_jit_emit_skb_loads(image, ctx);
793 case BPF_JMP | BPF_JA:
794 PPC_JMP(addrs[i + 1 + off]);
797 case BPF_JMP | BPF_JGT | BPF_K:
798 case BPF_JMP | BPF_JGT | BPF_X:
799 case BPF_JMP | BPF_JSGT | BPF_K:
800 case BPF_JMP | BPF_JSGT | BPF_X:
803 case BPF_JMP | BPF_JLT | BPF_K:
804 case BPF_JMP | BPF_JLT | BPF_X:
805 case BPF_JMP | BPF_JSLT | BPF_K:
806 case BPF_JMP | BPF_JSLT | BPF_X:
809 case BPF_JMP | BPF_JGE | BPF_K:
810 case BPF_JMP | BPF_JGE | BPF_X:
811 case BPF_JMP | BPF_JSGE | BPF_K:
812 case BPF_JMP | BPF_JSGE | BPF_X:
815 case BPF_JMP | BPF_JLE | BPF_K:
816 case BPF_JMP | BPF_JLE | BPF_X:
817 case BPF_JMP | BPF_JSLE | BPF_K:
818 case BPF_JMP | BPF_JSLE | BPF_X:
821 case BPF_JMP | BPF_JEQ | BPF_K:
822 case BPF_JMP | BPF_JEQ | BPF_X:
825 case BPF_JMP | BPF_JNE | BPF_K:
826 case BPF_JMP | BPF_JNE | BPF_X:
829 case BPF_JMP | BPF_JSET | BPF_K:
830 case BPF_JMP | BPF_JSET | BPF_X:
836 case BPF_JMP | BPF_JGT | BPF_X:
837 case BPF_JMP | BPF_JLT | BPF_X:
838 case BPF_JMP | BPF_JGE | BPF_X:
839 case BPF_JMP | BPF_JLE | BPF_X:
840 case BPF_JMP | BPF_JEQ | BPF_X:
841 case BPF_JMP | BPF_JNE | BPF_X:
842 /* unsigned comparison */
843 PPC_CMPLD(dst_reg, src_reg);
845 case BPF_JMP | BPF_JSGT | BPF_X:
846 case BPF_JMP | BPF_JSLT | BPF_X:
847 case BPF_JMP | BPF_JSGE | BPF_X:
848 case BPF_JMP | BPF_JSLE | BPF_X:
849 /* signed comparison */
850 PPC_CMPD(dst_reg, src_reg);
852 case BPF_JMP | BPF_JSET | BPF_X:
853 PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, src_reg);
855 case BPF_JMP | BPF_JNE | BPF_K:
856 case BPF_JMP | BPF_JEQ | BPF_K:
857 case BPF_JMP | BPF_JGT | BPF_K:
858 case BPF_JMP | BPF_JLT | BPF_K:
859 case BPF_JMP | BPF_JGE | BPF_K:
860 case BPF_JMP | BPF_JLE | BPF_K:
862 * Need sign-extended load, so only positive
863 * values can be used as imm in cmpldi
865 if (imm >= 0 && imm < 32768)
866 PPC_CMPLDI(dst_reg, imm);
868 /* sign-extending load */
869 PPC_LI32(b2p[TMP_REG_1], imm);
870 /* ... but unsigned comparison */
871 PPC_CMPLD(dst_reg, b2p[TMP_REG_1]);
874 case BPF_JMP | BPF_JSGT | BPF_K:
875 case BPF_JMP | BPF_JSLT | BPF_K:
876 case BPF_JMP | BPF_JSGE | BPF_K:
877 case BPF_JMP | BPF_JSLE | BPF_K:
879 * signed comparison, so any 16-bit value
880 * can be used in cmpdi
882 if (imm >= -32768 && imm < 32768)
883 PPC_CMPDI(dst_reg, imm);
885 PPC_LI32(b2p[TMP_REG_1], imm);
886 PPC_CMPD(dst_reg, b2p[TMP_REG_1]);
889 case BPF_JMP | BPF_JSET | BPF_K:
890 /* andi does not sign-extend the immediate */
891 if (imm >= 0 && imm < 32768)
892 /* PPC_ANDI is _only/always_ dot-form */
893 PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm);
895 PPC_LI32(b2p[TMP_REG_1], imm);
896 PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
901 PPC_BCC(true_cond, addrs[i + 1 + off]);
905 * Loads from packet header/data
906 * Assume 32-bit input value in imm and X (src_reg)
910 case BPF_LD | BPF_W | BPF_ABS:
911 func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word);
912 goto common_load_abs;
913 case BPF_LD | BPF_H | BPF_ABS:
914 func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half);
915 goto common_load_abs;
916 case BPF_LD | BPF_B | BPF_ABS:
917 func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte);
921 * Load into r4, which can just be passed onto
922 * skb load helpers as the second parameter
928 case BPF_LD | BPF_W | BPF_IND:
929 func = (u8 *)sk_load_word;
930 goto common_load_ind;
931 case BPF_LD | BPF_H | BPF_IND:
932 func = (u8 *)sk_load_half;
933 goto common_load_ind;
934 case BPF_LD | BPF_B | BPF_IND:
935 func = (u8 *)sk_load_byte;
938 * Load from [src_reg + imm]
939 * Treat src_reg as a 32-bit value
941 PPC_EXTSW(4, src_reg);
943 if (imm >= -32768 && imm < 32768)
944 PPC_ADDI(4, 4, IMM_L(imm));
946 PPC_LI32(b2p[TMP_REG_1], imm);
947 PPC_ADD(4, 4, b2p[TMP_REG_1]);
952 ctx->seen |= SEEN_SKB;
953 ctx->seen |= SEEN_FUNC;
954 bpf_jit_emit_func_call(image, ctx, (u64)func);
957 * Helper returns 'lt' condition on error, and an
958 * appropriate return value in BPF_REG_0
960 PPC_BCC(COND_LT, exit_addr);
966 case BPF_JMP | BPF_TAIL_CALL:
967 ctx->seen |= SEEN_TAILCALL;
968 ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
975 * The filter contains something cruel & unusual.
976 * We don't handle it, but also there shouldn't be
977 * anything missing from our list.
979 pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n",
985 /* Set end-of-body-code address for exit. */
986 addrs[i] = ctx->idx * 4;
991 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
998 struct codegen_context cgctx;
1001 struct bpf_binary_header *bpf_hdr;
1002 struct bpf_prog *org_fp = fp;
1003 struct bpf_prog *tmp_fp;
1004 bool bpf_blinded = false;
1006 if (!bpf_jit_enable)
1009 tmp_fp = bpf_jit_blind_constants(org_fp);
1013 if (tmp_fp != org_fp) {
1019 addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
1020 if (addrs == NULL) {
1025 memset(&cgctx, 0, sizeof(struct codegen_context));
1027 /* Scouting faux-generate pass 0 */
1028 if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
1029 /* We hit something illegal or unsupported. */
1035 * Pretend to build prologue, given the features we've seen. This will
1036 * update ctgtx.idx as it pretends to output instructions, then we can
1037 * calculate total size from idx.
1039 bpf_jit_build_prologue(0, &cgctx);
1040 bpf_jit_build_epilogue(0, &cgctx);
1042 proglen = cgctx.idx * 4;
1043 alloclen = proglen + FUNCTION_DESCR_SIZE;
1045 bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4,
1046 bpf_jit_fill_ill_insns);
1052 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
1054 /* Code generation passes 1-2 */
1055 for (pass = 1; pass < 3; pass++) {
1056 /* Now build the prologue, body code & epilogue for real. */
1058 bpf_jit_build_prologue(code_base, &cgctx);
1059 bpf_jit_build_body(fp, code_base, &cgctx, addrs);
1060 bpf_jit_build_epilogue(code_base, &cgctx);
1062 if (bpf_jit_enable > 1)
1063 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
1064 proglen - (cgctx.idx * 4), cgctx.seen);
1067 if (bpf_jit_enable > 1)
1069 * Note that we output the base address of the code_base
1070 * rather than image, since opcodes are in code_base.
1072 bpf_jit_dump(flen, proglen, pass, code_base);
1074 #ifdef PPC64_ELF_ABI_v1
1075 /* Function descriptor nastiness: Address + TOC */
1076 ((u64 *)image)[0] = (u64)code_base;
1077 ((u64 *)image)[1] = local_paca->kernel_toc;
1080 fp->bpf_func = (void *)image;
1082 fp->jited_len = alloclen;
1084 bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
1090 bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
1095 /* Overriding bpf_jit_free() as we don't set images read-only. */
1096 void bpf_jit_free(struct bpf_prog *fp)
1098 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
1099 struct bpf_binary_header *bpf_hdr = (void *)addr;
1102 bpf_jit_binary_free(bpf_hdr);
1104 bpf_prog_unlock_free(fp);