diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-06 09:38:14 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-06 09:38:14 -0700 |
commit | ae045e2455429c418a418a3376301a9e5753a0a8 (patch) | |
tree | b445bdeecd3f38aa0d0a29c9585cee49e4ccb0f1 /kernel | |
parent | f4f142ed4ef835709c7e6d12eaca10d190bcebed (diff) | |
parent | d247b6ab3ce6dd43665780865ec5fa145d9ab6bd (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
1) Steady transitioning of the BPF instructure to a generic spot so
all kernel subsystems can make use of it, from Alexei Starovoitov.
2) SFC driver supports busy polling, from Alexandre Rames.
3) Take advantage of hash table in UDP multicast delivery, from David
Held.
4) Lighten locking, in particular by getting rid of the LRU lists, in
inet frag handling. From Florian Westphal.
5) Add support for various RFC6458 control messages in SCTP, from
Geir Ola Vaagland.
6) Allow to filter bridge forwarding database dumps by device, from
Jamal Hadi Salim.
7) virtio-net also now supports busy polling, from Jason Wang.
8) Some low level optimization tweaks in pktgen from Jesper Dangaard
Brouer.
9) Add support for ipv6 address generation modes, so that userland
can have some input into the process. From Jiri Pirko.
10) Consolidate common TCP connection request code in ipv4 and ipv6,
from Octavian Purdila.
11) New ARP packet logger in netfilter, from Pablo Neira Ayuso.
12) Generic resizable RCU hash table, with intial users in netlink and
nftables. From Thomas Graf.
13) Maintain a name assignment type so that userspace can see where a
network device name came from (enumerated by kernel, assigned
explicitly by userspace, etc.) From Tom Gundersen.
14) Automatic flow label generation on transmit in ipv6, from Tom
Herbert.
15) New packet timestamping facilities from Willem de Bruijn, meant to
assist in measuring latencies going into/out-of the packet
scheduler, latency from TCP data transmission to ACK, etc"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1536 commits)
cxgb4 : Disable recursive mailbox commands when enabling vi
net: reduce USB network driver config options.
tg3: Modify tg3_tso_bug() to handle multiple TX rings
amd-xgbe: Perform phy connect/disconnect at dev open/stop
amd-xgbe: Use dma_set_mask_and_coherent to set DMA mask
net: sun4i-emac: fix memory leak on bad packet
sctp: fix possible seqlock seadlock in sctp_packet_transmit()
Revert "net: phy: Set the driver when registering an MDIO bus device"
cxgb4vf: Turn off SGE RX/TX Callback Timers and interrupts in PCI shutdown routine
team: Simplify return path of team_newlink
bridge: Update outdated comment on promiscuous mode
net-timestamp: ACK timestamp for bytestreams
net-timestamp: TCP timestamping
net-timestamp: SCHED timestamp on entering packet scheduler
net-timestamp: add key to disambiguate concurrent datagrams
net-timestamp: move timestamp flags out of sk_flags
net-timestamp: extend SCM_TIMESTAMPING ancillary data struct
cxgb4i : Move stray CPL definitions to cxgb4 driver
tcp: reduce spurious retransmits due to transient SACK reneging
qlcnic: Initialize dcbnl_ops before register_netdev
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/bpf/Makefile | 1 | ||||
-rw-r--r-- | kernel/bpf/core.c | 534 | ||||
-rw-r--r-- | kernel/seccomp.c | 20 | ||||
-rw-r--r-- | kernel/sysctl_binary.c | 1 |
5 files changed, 547 insertions, 10 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 973a40cf8068..0026cf531769 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o +obj-$(CONFIG_NET) += bpf/ obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile new file mode 100644 index 000000000000..6a71145e2769 --- /dev/null +++ b/kernel/bpf/Makefile @@ -0,0 +1 @@ +obj-y := core.o diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c new file mode 100644 index 000000000000..7f0dbcbb34af --- /dev/null +++ b/kernel/bpf/core.c @@ -0,0 +1,534 @@ +/* + * Linux Socket Filter - Kernel level socket filtering + * + * Based on the design of the Berkeley Packet Filter. The new + * internal format has been designed by PLUMgrid: + * + * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com + * + * Authors: + * + * Jay Schulist <jschlst@samba.org> + * Alexei Starovoitov <ast@plumgrid.com> + * Daniel Borkmann <dborkman@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Andi Kleen - Fix a few bad bugs and races. + * Kris Katterjohn - Added many additional checks in bpf_check_classic() + */ +#include <linux/filter.h> +#include <linux/skbuff.h> +#include <asm/unaligned.h> + +/* Registers */ +#define BPF_R0 regs[BPF_REG_0] +#define BPF_R1 regs[BPF_REG_1] +#define BPF_R2 regs[BPF_REG_2] +#define BPF_R3 regs[BPF_REG_3] +#define BPF_R4 regs[BPF_REG_4] +#define BPF_R5 regs[BPF_REG_5] +#define BPF_R6 regs[BPF_REG_6] +#define BPF_R7 regs[BPF_REG_7] +#define BPF_R8 regs[BPF_REG_8] +#define BPF_R9 regs[BPF_REG_9] +#define BPF_R10 regs[BPF_REG_10] + +/* Named registers */ +#define DST regs[insn->dst_reg] +#define SRC regs[insn->src_reg] +#define FP regs[BPF_REG_FP] +#define ARG1 regs[BPF_REG_ARG1] +#define CTX regs[BPF_REG_CTX] +#define IMM insn->imm + +/* No hurry in this branch + * + * Exported for the bpf jit load helper. + */ +void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size) +{ + u8 *ptr = NULL; + + if (k >= SKF_NET_OFF) + ptr = skb_network_header(skb) + k - SKF_NET_OFF; + else if (k >= SKF_LL_OFF) + ptr = skb_mac_header(skb) + k - SKF_LL_OFF; + if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) + return ptr; + + return NULL; +} + +/* Base function for offset calculation. Needs to go into .text section, + * therefore keeping it non-static as well; will also be used by JITs + * anyway later on, so do not let the compiler omit it. + */ +noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + return 0; +} + +/** + * __bpf_prog_run - run eBPF program on a given context + * @ctx: is the data we are operating on + * @insn: is the array of eBPF instructions + * + * Decode and execute eBPF instructions. + */ +static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) +{ + u64 stack[MAX_BPF_STACK / sizeof(u64)]; + u64 regs[MAX_BPF_REG], tmp; + static const void *jumptable[256] = { + [0 ... 255] = &&default_label, + /* Now overwrite non-defaults ... */ + /* 32 bit ALU operations */ + [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X, + [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K, + [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X, + [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K, + [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X, + [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K, + [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X, + [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K, + [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X, + [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K, + [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X, + [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K, + [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X, + [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K, + [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X, + [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K, + [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X, + [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K, + [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X, + [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K, + [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X, + [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K, + [BPF_ALU | BPF_NEG] = &&ALU_NEG, + [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE, + [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE, + /* 64 bit ALU operations */ + [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X, + [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K, + [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X, + [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K, + [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X, + [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K, + [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X, + [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K, + [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X, + [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K, + [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X, + [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K, + [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X, + [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K, + [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X, + [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K, + [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X, + [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K, + [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X, + [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K, + [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X, + [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K, + [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X, + [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K, + [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, + /* Call instruction */ + [BPF_JMP | BPF_CALL] = &&JMP_CALL, + /* Jumps */ + [BPF_JMP | BPF_JA] = &&JMP_JA, + [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X, + [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K, + [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X, + [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K, + [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X, + [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K, + [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X, + [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K, + [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X, + [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K, + [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X, + [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K, + [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X, + [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K, + /* Program return */ + [BPF_JMP | BPF_EXIT] = &&JMP_EXIT, + /* Store instructions */ + [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B, + [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H, + [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W, + [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW, + [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W, + [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW, + [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B, + [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H, + [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W, + [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW, + /* Load instructions */ + [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B, + [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H, + [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W, + [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW, + [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W, + [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H, + [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B, + [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W, + [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H, + [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, + }; + void *ptr; + int off; + +#define CONT ({ insn++; goto select_insn; }) +#define CONT_JMP ({ insn++; goto select_insn; }) + + FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; + ARG1 = (u64) (unsigned long) ctx; + + /* Registers used in classic BPF programs need to be reset first. */ + regs[BPF_REG_A] = 0; + regs[BPF_REG_X] = 0; + +select_insn: + goto *jumptable[insn->code]; + + /* ALU */ +#define ALU(OPCODE, OP) \ + ALU64_##OPCODE##_X: \ + DST = DST OP SRC; \ + CONT; \ + ALU_##OPCODE##_X: \ + DST = (u32) DST OP (u32) SRC; \ + CONT; \ + ALU64_##OPCODE##_K: \ + DST = DST OP IMM; \ + CONT; \ + ALU_##OPCODE##_K: \ + DST = (u32) DST OP (u32) IMM; \ + CONT; + + ALU(ADD, +) + ALU(SUB, -) + ALU(AND, &) + ALU(OR, |) + ALU(LSH, <<) + ALU(RSH, >>) + ALU(XOR, ^) + ALU(MUL, *) +#undef ALU + ALU_NEG: + DST = (u32) -DST; + CONT; + ALU64_NEG: + DST = -DST; + CONT; + ALU_MOV_X: + DST = (u32) SRC; + CONT; + ALU_MOV_K: + DST = (u32) IMM; + CONT; + ALU64_MOV_X: + DST = SRC; + CONT; + ALU64_MOV_K: + DST = IMM; + CONT; + ALU64_ARSH_X: + (*(s64 *) &DST) >>= SRC; + CONT; + ALU64_ARSH_K: + (*(s64 *) &DST) >>= IMM; + CONT; + ALU64_MOD_X: + if (unlikely(SRC == 0)) + return 0; + tmp = DST; + DST = do_div(tmp, SRC); + CONT; + ALU_MOD_X: + if (unlikely(SRC == 0)) + return 0; + tmp = (u32) DST; + DST = do_div(tmp, (u32) SRC); + CONT; + ALU64_MOD_K: + tmp = DST; + DST = do_div(tmp, IMM); + CONT; + ALU_MOD_K: + tmp = (u32) DST; + DST = do_div(tmp, (u32) IMM); + CONT; + ALU64_DIV_X: + if (unlikely(SRC == 0)) + return 0; + do_div(DST, SRC); + CONT; + ALU_DIV_X: + if (unlikely(SRC == 0)) + return 0; + tmp = (u32) DST; + do_div(tmp, (u32) SRC); + DST = (u32) tmp; + CONT; + ALU64_DIV_K: + do_div(DST, IMM); + CONT; + ALU_DIV_K: + tmp = (u32) DST; + do_div(tmp, (u32) IMM); + DST = (u32) tmp; + CONT; + ALU_END_TO_BE: + switch (IMM) { + case 16: + DST = (__force u16) cpu_to_be16(DST); + break; + case 32: + DST = (__force u32) cpu_to_be32(DST); + break; + case 64: + DST = (__force u64) cpu_to_be64(DST); + break; + } + CONT; + ALU_END_TO_LE: + switch (IMM) { + case 16: + DST = (__force u16) cpu_to_le16(DST); + break; + case 32: + DST = (__force u32) cpu_to_le32(DST); + break; + case 64: + DST = (__force u64) cpu_to_le64(DST); + break; + } + CONT; + + /* CALL */ + JMP_CALL: + /* Function call scratches BPF_R1-BPF_R5 registers, + * preserves BPF_R6-BPF_R9, and stores return value + * into BPF_R0. + */ + BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3, + BPF_R4, BPF_R5); + CONT; + + /* JMP */ + JMP_JA: + insn += insn->off; + CONT; + JMP_JEQ_X: + if (DST == SRC) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JEQ_K: + if (DST == IMM) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JNE_X: + if (DST != SRC) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JNE_K: + if (DST != IMM) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JGT_X: + if (DST > SRC) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JGT_K: + if (DST > IMM) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JGE_X: + if (DST >= SRC) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JGE_K: + if (DST >= IMM) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSGT_X: + if (((s64) DST) > ((s64) SRC)) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSGT_K: + if (((s64) DST) > ((s64) IMM)) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSGE_X: + if (((s64) DST) >= ((s64) SRC)) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSGE_K: + if (((s64) DST) >= ((s64) IMM)) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSET_X: + if (DST & SRC) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSET_K: + if (DST & IMM) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_EXIT: + return BPF_R0; + + /* STX and ST and LDX*/ +#define LDST(SIZEOP, SIZE) \ + STX_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ + CONT; \ + ST_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \ + CONT; \ + LDX_MEM_##SIZEOP: \ + DST = *(SIZE *)(unsigned long) (SRC + insn->off); \ + CONT; + + LDST(B, u8) + LDST(H, u16) + LDST(W, u32) + LDST(DW, u64) +#undef LDST + STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */ + atomic_add((u32) SRC, (atomic_t *)(unsigned long) + (DST + insn->off)); + CONT; + STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */ + atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) + (DST + insn->off)); + CONT; + LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */ + off = IMM; +load_word: + /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are + * only appearing in the programs where ctx == + * skb. All programs keep 'ctx' in regs[BPF_REG_CTX] + * == BPF_R6, bpf_convert_filter() saves it in BPF_R6, + * internal BPF verifier will check that BPF_R6 == + * ctx. + * + * BPF_ABS and BPF_IND are wrappers of function calls, + * so they scratch BPF_R1-BPF_R5 registers, preserve + * BPF_R6-BPF_R9, and store return value into BPF_R0. + * + * Implicit input: + * ctx == skb == BPF_R6 == CTX + * + * Explicit input: + * SRC == any register + * IMM == 32-bit immediate + * + * Output: + * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness + */ + + ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp); + if (likely(ptr != NULL)) { + BPF_R0 = get_unaligned_be32(ptr); + CONT; + } + + return 0; + LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */ + off = IMM; +load_half: + ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp); + if (likely(ptr != NULL)) { + BPF_R0 = get_unaligned_be16(ptr); + CONT; + } + + return 0; + LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */ + off = IMM; +load_byte: + ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp); + if (likely(ptr != NULL)) { + BPF_R0 = *(u8 *)ptr; + CONT; + } + + return 0; + LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */ + off = IMM + SRC; + goto load_word; + LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */ + off = IMM + SRC; + goto load_half; + LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */ + off = IMM + SRC; + goto load_byte; + + default_label: + /* If we ever reach this, we have a bug somewhere. */ + WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code); + return 0; +} + +void __weak bpf_int_jit_compile(struct bpf_prog *prog) +{ +} + +/** + * bpf_prog_select_runtime - select execution runtime for BPF program + * @fp: bpf_prog populated with internal BPF program + * + * try to JIT internal BPF program, if JIT is not available select interpreter + * BPF program will be executed via BPF_PROG_RUN() macro + */ +void bpf_prog_select_runtime(struct bpf_prog *fp) +{ + fp->bpf_func = (void *) __bpf_prog_run; + + /* Probe if internal BPF can be JITed */ + bpf_int_jit_compile(fp); +} +EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); + +/* free internal BPF program */ +void bpf_prog_free(struct bpf_prog *fp) +{ + bpf_jit_free(fp); +} +EXPORT_SYMBOL_GPL(bpf_prog_free); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 74f460179171..25b0043f4755 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -56,7 +56,7 @@ struct seccomp_filter { atomic_t usage; struct seccomp_filter *prev; - struct sk_filter *prog; + struct bpf_prog *prog; }; /* Limit any path through the tree to 256KB worth of instructions. */ @@ -89,7 +89,7 @@ static void populate_seccomp_data(struct seccomp_data *sd) * @filter: filter to verify * @flen: length of filter * - * Takes a previously checked filter (by sk_chk_filter) and + * Takes a previously checked filter (by bpf_check_classic) and * redirects all filter code that loads struct sk_buff data * and related data through seccomp_bpf_load. It also * enforces length and alignment checking of those loads. @@ -192,7 +192,7 @@ static u32 seccomp_run_filters(int syscall) * value always takes priority (ignoring the DATA). */ for (; f; f = f->prev) { - u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd); + u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd); if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; @@ -374,7 +374,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) goto free_prog; /* Check and rewrite the fprog via the skb checker */ - ret = sk_chk_filter(fp, fprog->len); + ret = bpf_check_classic(fp, fprog->len); if (ret) goto free_prog; @@ -383,8 +383,8 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) if (ret) goto free_prog; - /* Convert 'sock_filter' insns to 'sock_filter_int' insns */ - ret = sk_convert_filter(fp, fprog->len, NULL, &new_len); + /* Convert 'sock_filter' insns to 'bpf_insn' insns */ + ret = bpf_convert_filter(fp, fprog->len, NULL, &new_len); if (ret) goto free_prog; @@ -395,12 +395,12 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) if (!filter) goto free_prog; - filter->prog = kzalloc(sk_filter_size(new_len), + filter->prog = kzalloc(bpf_prog_size(new_len), GFP_KERNEL|__GFP_NOWARN); if (!filter->prog) goto free_filter; - ret = sk_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len); + ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len); if (ret) goto free_filter_prog; kfree(fp); @@ -408,7 +408,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) atomic_set(&filter->usage, 1); filter->prog->len = new_len; - sk_filter_select_runtime(filter->prog); + bpf_prog_select_runtime(filter->prog); return filter; @@ -509,7 +509,7 @@ void get_seccomp_filter(struct task_struct *tsk) static inline void seccomp_filter_free(struct seccomp_filter *filter) { if (filter) { - sk_filter_free(filter->prog); + bpf_prog_free(filter->prog); kfree(filter); } } diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 653cbbd9e7ad..e4ba9a5a5ccb 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -522,6 +522,7 @@ static const struct bin_table bin_net_ipv6_conf_var_table[] = { { CTL_INT, NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, "accept_ra_rt_info_max_plen" }, { CTL_INT, NET_IPV6_PROXY_NDP, "proxy_ndp" }, { CTL_INT, NET_IPV6_ACCEPT_SOURCE_ROUTE, "accept_source_route" }, + { CTL_INT, NET_IPV6_ACCEPT_RA_FROM_LOCAL, "accept_ra_from_local" }, {} }; |