2023-10-27

bpf

bfp 在内核运行的核心入口函数及其变形

bpf prog内核运行核心入口函数

总结：`___bpf_prog_run`

bfp 在内核运行的核心入口函数:___bpf_prog_run
___bpf_prog_run是bfp的核心函数入口，该函数被多个不同stack size的函数调用。
函数指针数组interpreters这把上面的这些函数汇集到一起。
当bpf程序被加载到内核时候，内核创建为它一个bpf_prog结构体，根据prog的stacksize，选择对应的interpreters里的对应的
函数，并保存到bpf_prog里的bpf_func上。
这样后续hook点运行bpf_prog程序时候，就使用bpf_func运行。

对应patch： https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h=v6.6-rc2&id=b870aa

bpf: use different interpreter depending on required stack size

Author: Alexei Starovoitov <ast@kernel.org>
Date:   Tue May 30 13:31:33 2017 -0700

    bpf: use different interpreter depending on required stack size

    16 __bpf_prog_run() interpreters for various stack sizes add .text
    but not a lot comparing to run-time stack savings

       text    data     bss     dec     hex filename
      26350   10328     624   37302    91b6 kernel/bpf/core.o.before_split
      25777   10328     624   36729    8f79 kernel/bpf/core.o.after_split
      26970   10328     624   37922    9422 kernel/bpf/core.o.now

函数定义

先看下函数定义：

1
2
3

2049 EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192);
2050 EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384);
2051 EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512);

展开宏`EVAL`：

EVAL宏定义：

1
2
3

2042 #define EVAL1(FN, X) FN(X)
2043 #define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
2051 EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512);

为了便于理解，我们人肉展开宏后,效果相当于

1	DEFINE_BPF_PROG_RUN(416） DEFINE_BPF_PROG_RUN(448）DEFINE_BPF_PROG_RUN(480）DEFINE_BPF_PROG_RUN(512)

展开宏`DEFINE_BPF_PROG_RUN`:

2013 #define PROG_NAME(stack_size) __bpf_prog_run##stack_size
2014 #define DEFINE_BPF_PROG_RUN(stack_size) \
2015 static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
2016 { \
2017         u64 stack[stack_size / sizeof(u64)]; \
2018         u64 regs[MAX_BPF_EXT_REG] = {}; \
2019 \
2020         FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
2021         ARG1 = (u64) (unsigned long) ctx; \
2022         return ___bpf_prog_run(regs, insn); \
2023 }

DEFINE_BPF_PROG_RUN定义了一个以__bpf_prog_run开头加satcksize的函数。
比如DEFINE_BPF_PROG_RUN(416）展开后

1	static unsigned int__bpf_prog_run416(stack_size)(const void ctx, const struct bpf_insn insn) \

其中函数里面的FP和ARG1都是BPF机制里使用的寄存器

60 #define FP      regs[BPF_REG_FP]
61 #define AX      regs[BPF_REG_AX]
62 #define ARG1    regs[BPF_REG_ARG1]
63 #define CTX     regs[BPF_REG_CTX]

函数指针数组`interpreters`

函数指针数组interpreters

2057 #define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size),
2058
2059 static unsigned int (*interpreters[])(const void *ctx,
2060                                       const struct bpf_insn *insn) = {
2061 EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
2062 EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
2063 EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
2064 };
2065 #undef PROG_NAME_LIST

展开EVAL宏：

EVAL宏定义：

1 2	2042 #define EVAL1(FN, X) FN(X) 2043 #define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)

以EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) 为例，
展开宏后：

1	PROG_NAME_LIST(416） PROG_NAME_LIST(448）PROG_NAME_LIST(480） PROG_NAME_LIST(512)

展开`PROG_NAME_LIST`宏：

1 2	2057 #define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size), 2013 #define PROG_NAME(stack_size) __bpf_prog_run##stack_size

1
2
3

PROG_NAME_LIST(416) 
 ==> 展开：PROG_NAME(416)
 ==> ==> 再展开：__bpf_prog_run416

相比之下，老代码更直观且容易理解

对比下老代码，更便于理解，先看一下老代码,commit（b870aa901f4be）之前代码是这样的。
在bpf_prog_select_runtime函数中，直接把bpf_func赋值为__bpf_prog_run

struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 {
     fp->bpf_func = (void *) __bpf_prog_run;


     /* eBPF JITs can rewrite the program in case constant
      * blinding is active. However, in case of error during
      * blinding, bpf_int_jit_compile() must always return a
      * valid program, which in this case would simply not
      * be JITed, but falls back to the interpreter.
      */
     fp = bpf_int_jit_compile(fp);
     bpf_prog_lock_ro(fp);


     /* The tail call compatibility check can only be done at
      * this late stage as we need to determine, if we deal
      * with JITed or non JITed program concatenations and not
      * all eBPF JITs might immediately support all features.
      */
     *err = bpf_check_tail_call(fp);


     return fp;
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);

static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 {
     u64 stack[MAX_BPF_STACK / sizeof(u64)];
     u64 regs[MAX_BPF_REG];


     FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
     ARG1 = (u64) (unsigned long) ctx;
     return ___bpf_prog_run(regs, insn, stack);
 }

. BPF如何调用

初始化bpf_func

当bpf程序被加载到内核时候，内核创建为它一个bpf_prog结构体，根据prog的stacksize，选择interpreters里的对应的
函数，并选中的函数地址保存到bpf_prog里的bpf_func这个函数指针上。
这样后续hook点运行bpf_prog程序时候，就使用bpf_func运行

2550 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
2551 {
...
2711         prog = bpf_prog_select_runtime(prog, &err);

1
2
3

2179 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
...
2193         bpf_prog_select_func(fp);

2157 static void bpf_prog_select_func(struct bpf_prog *fp)
2158 {
2159 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
2160         u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
2161
2162         fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
2163 #else
2164         fp->bpf_func = __bpf_prog_ret0_warn;
2165 #endif
2166 }

bpf prog内核运行核心入口函数

总结：___bpf_prog_run

函数定义

先看下函数定义：

展开宏EVAL：

展开宏DEFINE_BPF_PROG_RUN:

函数指针数组interpreters

函数指针数组interpreters

展开EVAL宏：

展开PROG_NAME_LIST宏：

相比之下，老代码更直观且容易理解

. BPF如何调用

初始化bpf_func

总结：`___bpf_prog_run`

展开宏`EVAL`：

展开宏`DEFINE_BPF_PROG_RUN`:

函数指针数组`interpreters`

展开`PROG_NAME_LIST`宏：