From 7b146cebe30cb481b0f70d85779da938da818637 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 27 Feb 2019 12:59:24 -0800 Subject: bpf: Sysctl hook Containerized applications may run as root and it may create problems for whole host. Specifically such applications may change a sysctl and affect applications in other containers. Furthermore in existing infrastructure it may not be possible to just completely disable writing to sysctl, instead such a process should be gradual with ability to log what sysctl are being changed by a container, investigate, limit the set of writable sysctl to currently used ones (so that new ones can not be changed) and eventually reduce this set to zero. The patch introduces new program type BPF_PROG_TYPE_CGROUP_SYSCTL and attach type BPF_CGROUP_SYSCTL to solve these problems on cgroup basis. New program type has access to following minimal context: struct bpf_sysctl { __u32 write; }; Where @write indicates whether sysctl is being read (= 0) or written (= 1). Helpers to access sysctl name and value will be introduced separately. BPF_CGROUP_SYSCTL attach point is added to sysctl code right before passing control to ctl_table->proc_handler so that BPF program can either allow or deny access to sysctl. Suggested-by: Roman Gushchin Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- include/linux/bpf-cgroup.h | 18 ++++++++++++++++++ include/linux/bpf_types.h | 1 + include/linux/filter.h | 8 ++++++++ 3 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a4c644c1c091..b1c45da20a26 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -17,6 +17,8 @@ struct bpf_map; struct bpf_prog; struct bpf_sock_ops_kern; struct bpf_cgroup_storage; +struct ctl_table; +struct ctl_table_header; #ifdef CONFIG_CGROUP_BPF @@ -109,6 +111,10 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum bpf_attach_type type); +int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, + struct ctl_table *table, int write, + enum bpf_attach_type type); + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -253,6 +259,17 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, \ __ret; \ }) + + +#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ + BPF_CGROUP_SYSCTL); \ + __ret; \ +}) + int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, @@ -321,6 +338,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) ({ 0; }) #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 08bf2f1fe553..d26991a16894 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -28,6 +28,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) #endif #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl) #endif #ifdef CONFIG_BPF_LIRC_MODE2 BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) diff --git a/include/linux/filter.h b/include/linux/filter.h index 6074aa064b54..a17732057880 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -33,6 +33,8 @@ struct bpf_prog_aux; struct xdp_rxq_info; struct xdp_buff; struct sock_reuseport; +struct ctl_table; +struct ctl_table_header; /* ArgX, context and stack frame pointer register positions. Note, * Arg1, Arg2, Arg3, etc are used as argument mappings of function @@ -1177,4 +1179,10 @@ struct bpf_sock_ops_kern { */ }; +struct bpf_sysctl_kern { + struct ctl_table_header *head; + struct ctl_table *table; + int write; +}; + #endif /* __LINUX_FILTER_H__ */ -- cgit v1.2.3 From 1d11b3016cec4ed9770b98e82a61708c8f4926e7 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 28 Feb 2019 19:22:15 -0800 Subject: bpf: Introduce bpf_sysctl_get_current_value helper Add bpf_sysctl_get_current_value() helper to copy current sysctl value into provided by BPF_PROG_TYPE_CGROUP_SYSCTL program buffer. It provides same string as user space can see by reading corresponding file in /proc/sys/, including new line, etc. Documentation for the new helper is provided in bpf.h UAPI. Since current value is kept in ctl_table->data in a parsed form, ctl_table->proc_handler() with write=0 is called to read that data and convert it to a string. Such a string can later be parsed by a program using helpers that will be introduced separately. Unfortunately it's not trivial to provide API to access parsed data due to variety of data representations (string, intvec, uintvec, ulongvec, custom structures, even NULL, etc). Instead it's assumed that user know how to handle specific sysctl they're interested in and appropriate helpers can be used. Since ctl_table->proc_handler() expects __user buffer, conversion to __user happens for kernel allocated one where the value is stored. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index a17732057880..f254ff92819f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1182,6 +1182,8 @@ struct bpf_sock_ops_kern { struct bpf_sysctl_kern { struct ctl_table_header *head; struct ctl_table *table; + void *cur_val; + size_t cur_len; int write; }; -- cgit v1.2.3 From 4e63acdff864654cee0ac5aaeda3913798ee78f6 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 7 Mar 2019 18:38:43 -0800 Subject: bpf: Introduce bpf_sysctl_{get,set}_new_value helpers Add helpers to work with new value being written to sysctl by user space. bpf_sysctl_get_new_value() copies value being written to sysctl into provided buffer. bpf_sysctl_set_new_value() overrides new value being written by user space with a one from provided buffer. Buffer should contain string representation of the value, similar to what can be seen in /proc/sys/. Both helpers can be used only on sysctl write. File position matters and can be managed by an interface that will be introduced separately. E.g. if user space calls sys_write to a file in /proc/sys/ at file position = X, where X > 0, then the value set by bpf_sysctl_set_new_value() will be written starting from X. If program wants to override whole value with specified buffer, file position has to be set to zero. Documentation for the new helpers is provided in bpf.h UAPI. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- include/linux/bpf-cgroup.h | 8 +++++--- include/linux/filter.h | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index b1c45da20a26..1e97271f9a10 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -113,7 +113,8 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, - enum bpf_attach_type type); + void __user *buf, size_t *pcount, + void **new_buf, enum bpf_attach_type type); static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) @@ -261,11 +262,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) \ +#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, nbuf) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled) \ __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ + buf, count, nbuf, \ BPF_CGROUP_SYSCTL); \ __ret; \ }) @@ -338,7 +340,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,nbuf) ({ 0; }) #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/filter.h b/include/linux/filter.h index f254ff92819f..a23653f9460c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1184,6 +1184,9 @@ struct bpf_sysctl_kern { struct ctl_table *table; void *cur_val; size_t cur_len; + void *new_val; + size_t new_len; + int new_updated; int write; }; -- cgit v1.2.3 From e1550bfe0de47e30484ba91de1e50a91ec1c31f5 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 7 Mar 2019 18:50:52 -0800 Subject: bpf: Add file_pos field to bpf_sysctl ctx Add file_pos field to bpf_sysctl context to read and write sysctl file position at which sysctl is being accessed (read or written). The field can be used to e.g. override whole sysctl value on write to sysctl even when sys_write is called by user space with file_pos > 0. Or BPF program may reject such accesses. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- include/linux/bpf-cgroup.h | 9 +++++---- include/linux/filter.h | 3 +++ 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 1e97271f9a10..cb3c6b3b89c8 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -114,7 +114,8 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, void __user *buf, size_t *pcount, - void **new_buf, enum bpf_attach_type type); + loff_t *ppos, void **new_buf, + enum bpf_attach_type type); static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) @@ -262,12 +263,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, nbuf) \ +#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos, nbuf) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled) \ __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ - buf, count, nbuf, \ + buf, count, pos, nbuf, \ BPF_CGROUP_SYSCTL); \ __ret; \ }) @@ -340,7 +341,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,nbuf) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; }) #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/filter.h b/include/linux/filter.h index a23653f9460c..fb0edad75971 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1188,6 +1188,9 @@ struct bpf_sysctl_kern { size_t new_len; int new_updated; int write; + loff_t *ppos; + /* Temporary "register" for indirect stores to ppos. */ + u64 tmp_reg; }; #endif /* __LINUX_FILTER_H__ */ -- cgit v1.2.3 From 57c3bb725a3dd97d960d7e1cd0845d88de53217f Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Mon, 18 Mar 2019 16:57:10 -0700 Subject: bpf: Introduce ARG_PTR_TO_{INT,LONG} arg types Currently the way to pass result from BPF helper to BPF program is to provide memory area defined by pointer and size: func(void *, size_t). It works great for generic use-case, but for simple types, such as int, it's overkill and consumes two arguments when it could use just one. Introduce new argument types ARG_PTR_TO_INT and ARG_PTR_TO_LONG to be able to pass result from helper to program via pointer to int and long correspondingly: func(int *) or func(long *). New argument types are similar to ARG_PTR_TO_MEM with the following differences: * they don't require corresponding ARG_CONST_SIZE argument, predefined access sizes are used instead (32bit for int, 64bit for long); * it's possible to use more than one such an argument in a helper; * provided pointers have to be aligned. It's easy to introduce similar ARG_PTR_TO_CHAR and ARG_PTR_TO_SHORT argument types. It's not done due to lack of use-case though. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e4d4c1771ab0..fd06ada941ad 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -202,6 +202,8 @@ enum bpf_arg_type { ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ + ARG_PTR_TO_INT, /* pointer to int */ + ARG_PTR_TO_LONG, /* pointer to long */ }; /* type of values returned from helper functions */ -- cgit v1.2.3 From d7a4cb9b6705a89937d12c8158a35a3145dc967a Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Mon, 18 Mar 2019 17:55:26 -0700 Subject: bpf: Introduce bpf_strtol and bpf_strtoul helpers Add bpf_strtol and bpf_strtoul to convert a string to long and unsigned long correspondingly. It's similar to user space strtol(3) and strtoul(3) with a few changes to the API: * instead of NUL-terminated C string the helpers expect buffer and buffer length; * resulting long or unsigned long is returned in a separate result-argument; * return value is used to indicate success or failure, on success number of consumed bytes is returned that can be used to identify position to read next if the buffer is expected to contain multiple integers; * instead of *base* argument, *flags* is used that provides base in 5 LSB, other bits are reserved for future use; * number of supported bases is limited. Documentation for the new helpers is provided in bpf.h UAPI. The helpers are made available to BPF_PROG_TYPE_CGROUP_SYSCTL programs to be able to convert string input to e.g. "ulongvec" output. E.g. "net/ipv4/tcp_mem" consists of three ulong integers. They can be parsed by calling to bpf_strtoul three times. Implementation notes: Implementation includes "../../lib/kstrtox.h" to reuse integer parsing functions. It's done exactly same way as fs/proc/base.c already does. Unfortunately existing kstrtoX function can't be used directly since they fail if any invalid character is present right after integer in the string. Existing simple_strtoX functions can't be used either since they're obsolete and don't handle overflow properly. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fd06ada941ad..f15432d90728 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -989,6 +989,8 @@ extern const struct bpf_func_proto bpf_sk_redirect_map_proto; extern const struct bpf_func_proto bpf_spin_lock_proto; extern const struct bpf_func_proto bpf_spin_unlock_proto; extern const struct bpf_func_proto bpf_get_local_storage_proto; +extern const struct bpf_func_proto bpf_strtol_proto; +extern const struct bpf_func_proto bpf_strtoul_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); -- cgit v1.2.3 From ba0509b6881efd0c8b26c36490cba87d8fb324c0 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 12 Apr 2019 17:07:37 +0200 Subject: net: core: introduce build_skb_around The function build_skb() also have the responsibility to allocate and clear the SKB structure. Introduce a new function build_skb_around(), that moves the responsibility of allocation and clearing to the caller. This allows caller to use kmem_cache (slab/slub) bulk allocation API. Next patch use this function combined with kmem_cache_alloc_bulk. Signed-off-by: Jesper Dangaard Brouer Acked-by: Song Liu Acked-by: Eric Dumazet Signed-off-by: Alexei Starovoitov --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a06275a618f0..e81f2b0e8a83 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1042,6 +1042,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, int node); struct sk_buff *__build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb(void *data, unsigned int frag_size); +struct sk_buff *build_skb_around(struct sk_buff *skb, + void *data, unsigned int frag_size); /** * alloc_skb - allocate a network buffer -- cgit v1.2.3 From 7df737e991069d75eec1ded1c8b37e81b8c54df9 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 19 Apr 2019 07:44:54 -0700 Subject: bpf: remove global variables Move three global variables protected by bpf_verifier_lock into 'struct bpf_verifier_env' to allow parallel verification. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b3ab61fe1932..1305ccbd8fe6 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -295,6 +295,11 @@ struct bpf_verifier_env { const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; + struct { + int *insn_state; + int *insn_stack; + int cur_stack; + } cfg; u32 subprog_cnt; /* number of instructions analyzed by the verifier */ u32 insn_processed; -- cgit v1.2.3