From 0456ea170cd665ddbb9503be92e39f96055dd5fa Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 20 Apr 2020 10:46:10 -0700 Subject: bpf: Enable more helpers for BPF_PROG_TYPE_CGROUP_{DEVICE,SYSCTL,SOCKOPT} Currently the following prog types don't fall back to bpf_base_func_proto() (instead they have cgroup_base_func_proto which has a limited set of helpers from bpf_base_func_proto): * BPF_PROG_TYPE_CGROUP_DEVICE * BPF_PROG_TYPE_CGROUP_SYSCTL * BPF_PROG_TYPE_CGROUP_SOCKOPT I don't see any specific reason why we shouldn't use bpf_base_func_proto(), every other type of program (except bpf-lirc and, understandably, tracing) use it, so let's fall back to bpf_base_func_proto for those prog types as well. This basically boils down to adding access to the following helpers: * BPF_FUNC_get_prandom_u32 * BPF_FUNC_get_smp_processor_id * BPF_FUNC_get_numa_node_id * BPF_FUNC_tail_call * BPF_FUNC_ktime_get_ns * BPF_FUNC_spin_lock (CAP_SYS_ADMIN) * BPF_FUNC_spin_unlock (CAP_SYS_ADMIN) * BPF_FUNC_jiffies64 (CAP_SYS_ADMIN) I've also added bpf_perf_event_output() because it's really handy for logging and debugging. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200420174610.77494-1-sdf@google.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fd2b2322412d..25da6ff2a880 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1523,6 +1523,7 @@ extern const struct bpf_func_proto bpf_strtoul_proto; extern const struct bpf_func_proto bpf_tcp_sock_proto; extern const struct bpf_func_proto bpf_jiffies64_proto; extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; +extern const struct bpf_func_proto bpf_event_output_data_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); -- cgit v1.2.3 From 6890896bd765b0504761c61901c9804fca23bfb2 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 24 Apr 2020 16:59:41 -0700 Subject: bpf: Fix missing bpf_base_func_proto in cgroup_base_func_proto for CGROUP_NET=n linux-next build bot reported compile issue [1] with one of its configs. It looks like when we have CONFIG_NET=n and CONFIG_BPF{,_SYSCALL}=y, we are missing the bpf_base_func_proto definition (from net/core/filter.c) in cgroup_base_func_proto. I'm reshuffling the code a bit to make it work. The common helpers are moved into kernel/bpf/helpers.c and the bpf_base_func_proto is exported from there. Also, bpf_get_raw_cpu_id goes into kernel/bpf/core.c akin to existing bpf_user_rnd_u32. [1] https://lore.kernel.org/linux-next/CAKH8qBsBvKHswiX1nx40LgO+BGeTmb1NX8tiTttt_0uu6T3dCA@mail.gmail.com/T/#mff8b0c083314c68c2e2ef0211cb11bc20dc13c72 Fixes: 0456ea170cd6 ("bpf: Enable more helpers for BPF_PROG_TYPE_CGROUP_{DEVICE,SYSCTL,SOCKOPT}") Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Cc: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200424235941.58382-1-sdf@google.com --- include/linux/bpf.h | 8 ++++++++ include/linux/filter.h | 2 -- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 25da6ff2a880..5147e11e53ff 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1215,6 +1215,7 @@ int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog, struct bpf_prog *bpf_prog_by_id(u32 id); +const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1365,6 +1366,12 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id) { return ERR_PTR(-ENOTSUPP); } + +static inline const struct bpf_func_proto * +bpf_base_func_proto(enum bpf_func_id func_id) +{ + return NULL; +} #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, @@ -1531,6 +1538,7 @@ const struct bpf_func_proto *bpf_tracing_func_proto( /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +u64 bpf_get_raw_cpu_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); #if defined(CONFIG_NET) bool bpf_sock_common_is_valid_access(int off, int size, diff --git a/include/linux/filter.h b/include/linux/filter.h index 9b5aa5c483cc..af37318bb1c5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -863,8 +863,6 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, bpf_aux_classic_check_t trans, bool save_orig); void bpf_prog_destroy(struct bpf_prog *fp); -const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); -- cgit v1.2.3 From 71d19214776e61b33da48f7c1b46e522c7f78221 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Sun, 26 Apr 2020 09:15:25 -0700 Subject: bpf: add bpf_ktime_get_boot_ns() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a device like a cellphone which is constantly suspending and resuming CLOCK_MONOTONIC is not particularly useful for keeping track of or reacting to external network events. Instead you want to use CLOCK_BOOTTIME. Hence add bpf_ktime_get_boot_ns() as a mirror of bpf_ktime_get_ns() based around CLOCK_BOOTTIME instead of CLOCK_MONOTONIC. Signed-off-by: Maciej Żenczykowski Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5147e11e53ff..10960cfabea4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1509,6 +1509,7 @@ extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_get_numa_node_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; +extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto; extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; -- cgit v1.2.3 From 26363af5643490a817272e1cc6f1d3f1d550a699 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2020 08:43:35 +0200 Subject: mm: remove watermark_boost_factor_sysctl_handler watermark_boost_factor_sysctl_handler is just a pointless wrapper for proc_dointvec_minmax, so remove it and use proc_dointvec_minmax directly. Signed-off-by: Christoph Hellwig Acked-by: David Rientjes Signed-off-by: Al Viro --- include/linux/mmzone.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1b9de7d220fb..f37bb8f187fc 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -911,8 +911,6 @@ static inline int is_highmem(struct zone *zone) struct ctl_table; int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int watermark_boost_factor_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; -- cgit v1.2.3 From 2374c09b1c8a883bb9b4b2fc3756703eeb618f4a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2020 08:43:36 +0200 Subject: sysctl: remove all extern declaration from sysctl.c Extern declarations in .c files are a bad style and can lead to mismatches. Use existing definitions in headers where they exist, and otherwise move the external declarations to suitable header files. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/coredump.h | 4 ++++ include/linux/file.h | 2 ++ include/linux/mm.h | 2 ++ include/linux/mmzone.h | 2 ++ include/linux/pid.h | 3 +++ include/linux/sysctl.h | 8 ++++++++ 6 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index abf4b4e65dbb..7a899e83835d 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -22,4 +22,8 @@ extern void do_coredump(const kernel_siginfo_t *siginfo); static inline void do_coredump(const kernel_siginfo_t *siginfo) {} #endif +extern int core_uses_pid; +extern char core_pattern[]; +extern unsigned int core_pipe_limit; + #endif /* _LINUX_COREDUMP_H */ diff --git a/include/linux/file.h b/include/linux/file.h index 142d102f285e..122f80084a3e 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -94,4 +94,6 @@ extern void fd_install(unsigned int fd, struct file *file); extern void flush_delayed_fput(void); extern void __fput_sync(struct file *); +extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max; + #endif /* __LINUX_FILE_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 5a323422d783..9c4e7e76dedd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3140,5 +3140,7 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping, pgoff_t first_index, pgoff_t nr); #endif +extern int sysctl_nr_trim_pages; + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f37bb8f187fc..b2af594ef0f7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -909,6 +909,7 @@ static inline int is_highmem(struct zone *zone) /* These two functions are used to setup the per zone pages min values */ struct ctl_table; + int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, @@ -925,6 +926,7 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, extern int numa_zonelist_order_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +extern int percpu_pagelist_fraction; extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 diff --git a/include/linux/pid.h b/include/linux/pid.h index cc896f0fc4e3..93543cbc0e6b 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -108,6 +108,9 @@ extern void transfer_pid(struct task_struct *old, struct task_struct *new, struct pid_namespace; extern struct pid_namespace init_pid_ns; +extern int pid_max; +extern int pid_max_min, pid_max_max; + /* * look up a PID in the hash table. Must be called with the tasklist_lock * or rcu_read_lock() held. diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 02fa84493f23..36143ca40b56 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -207,7 +207,15 @@ void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); +extern int pwrsw_enabled; +extern int unaligned_enabled; +extern int unaligned_dump_stack; +extern int no_unaligned_warning; + extern struct ctl_table sysctl_mount_point[]; +extern struct ctl_table random_table[]; +extern struct ctl_table firmware_config_table[]; +extern struct ctl_table epoll_table[]; #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) -- cgit v1.2.3 From 32927393dc1ccd60fb2bdc05b9e8e88753761469 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2020 08:43:38 +0200 Subject: sysctl: pass kernel pointers to ->proc_handler Instead of having all the sysctl handlers deal with user pointers, which is rather hairy in terms of the BPF interaction, copy the input to and from userspace in common code. This also means that the strings are always NUL-terminated by the common code, making the API a little bit safer. As most handler just pass through the data to one of the common handlers a lot of the changes are mechnical. Signed-off-by: Christoph Hellwig Acked-by: Andrey Ignatov Signed-off-by: Al Viro --- include/linux/bpf-cgroup.h | 9 ++++---- include/linux/compaction.h | 2 +- include/linux/fs.h | 6 ++--- include/linux/ftrace.h | 3 +-- include/linux/hugetlb.h | 15 ++++++------- include/linux/kprobes.h | 2 +- include/linux/latencytop.h | 4 ++-- include/linux/mm.h | 12 +++++----- include/linux/mmzone.h | 23 +++++++++---------- include/linux/nmi.h | 15 +++++-------- include/linux/perf_event.h | 13 +++++------ include/linux/printk.h | 2 +- include/linux/sched/sysctl.h | 44 +++++++++++++----------------------- include/linux/security.h | 2 +- include/linux/sysctl.h | 53 ++++++++++++++++++-------------------------- include/linux/timer.h | 3 +-- include/linux/vmstat.h | 8 +++---- include/linux/writeback.h | 28 +++++++++-------------- 18 files changed, 101 insertions(+), 143 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index c11b413d5b1a..0b41fd5fc96b 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -138,8 +138,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, - void __user *buf, size_t *pcount, - loff_t *ppos, void **new_buf, + void **buf, size_t *pcount, loff_t *ppos, enum bpf_attach_type type); int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, @@ -302,12 +301,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos, nbuf) \ +#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled) \ __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ - buf, count, pos, nbuf, \ + buf, count, pos, \ BPF_CGROUP_SYSCTL); \ __ret; \ }) @@ -429,7 +428,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ optlen, max_optlen, retval) ({ retval; }) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 4b898cdbdf05..a0eabfbeb0e1 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -86,7 +86,7 @@ static inline unsigned long compact_gap(unsigned int order) #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos); + void *buffer, size_t *length, loff_t *ppos); extern int sysctl_extfrag_threshold; extern int sysctl_compact_unevictable_allowed; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f6f59b4f22a..9b028d260649 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3536,11 +3536,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, struct ctl_table; int proc_nr_files(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); int proc_nr_dentry(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); int proc_nr_inodes(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); int __init get_filesystem_list(char *buf); #define __FMODE_EXEC ((__force int) FMODE_EXEC) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index db95244a62d4..ddfc377de0d2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1005,8 +1005,7 @@ extern void disable_trace_on_warning(void); extern int __disable_trace_on_warning; int tracepoint_printk_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); #else /* CONFIG_TRACING */ static inline void disable_trace_on_warning(void) { } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 43a1cef8f0f1..92c21c5ccc58 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -105,14 +105,13 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, void hugepage_put_subpool(struct hugepage_subpool *spool); void reset_vma_resv_huge_pages(struct vm_area_struct *vma); -int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); - -#ifdef CONFIG_NUMA -int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -#endif +int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); +int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); +int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); +int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 04bdaf01112c..594265bfd390 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -312,7 +312,7 @@ DEFINE_INSN_CACHE_OPS(optinsn); #ifdef CONFIG_SYSCTL extern int sysctl_kprobes_optimization; extern int proc_kprobes_optimization_handler(struct ctl_table *table, - int write, void __user *buffer, + int write, void *buffer, size_t *length, loff_t *ppos); #endif extern void wait_for_kprobe_optimizer(void); diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h index 9022f0c2e2e4..abe3d95f795b 100644 --- a/include/linux/latencytop.h +++ b/include/linux/latencytop.h @@ -38,8 +38,8 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter) void clear_tsk_latency_tracing(struct task_struct *p); -extern int sysctl_latencytop(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); +int sysctl_latencytop(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); #else diff --git a/include/linux/mm.h b/include/linux/mm.h index 9c4e7e76dedd..a7b1ef8ed970 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -201,10 +201,10 @@ extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern unsigned long sysctl_overcommit_kbytes; -extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *, - size_t *, loff_t *); -extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *, - size_t *, loff_t *); +int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); +int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) @@ -2957,8 +2957,8 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); #ifdef CONFIG_SYSCTL extern int sysctl_drop_caches; -int drop_caches_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +int drop_caches_sysctl_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); #endif void drop_slab(void); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b2af594ef0f7..93cf20f41e26 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -910,22 +910,21 @@ static inline int is_highmem(struct zone *zone) /* These two functions are used to setup the per zone pages min values */ struct ctl_table; -int min_free_kbytes_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); +int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *, + size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; -int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *, + size_t *, loff_t *); int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); + void *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); + void *, size_t *, loff_t *); int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); - -extern int numa_zonelist_order_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); + void *, size_t *, loff_t *); +int numa_zonelist_order_handler(struct ctl_table *, int, + void *, size_t *, loff_t *); extern int percpu_pagelist_fraction; extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 9003e29cde46..750c7f395ca9 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -202,16 +202,11 @@ static inline void watchdog_update_hrtimer_threshold(u64 period) { } #endif struct ctl_table; -extern int proc_watchdog(struct ctl_table *, int , - void __user *, size_t *, loff_t *); -extern int proc_nmi_watchdog(struct ctl_table *, int , - void __user *, size_t *, loff_t *); -extern int proc_soft_watchdog(struct ctl_table *, int , - void __user *, size_t *, loff_t *); -extern int proc_watchdog_thresh(struct ctl_table *, int , - void __user *, size_t *, loff_t *); -extern int proc_watchdog_cpumask(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +int proc_watchdog(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_nmi_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); +int proc_soft_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); +int proc_watchdog_thresh(struct ctl_table *, int , void *, size_t *, loff_t *); +int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); #ifdef CONFIG_HAVE_ACPI_APEI_NMI #include diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9c3e7619c929..347ea379622a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1280,15 +1280,12 @@ extern int sysctl_perf_cpu_time_max_percent; extern void perf_sample_event_took(u64 sample_len_ns); -extern int perf_proc_update_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - +int perf_proc_update_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); int perf_event_max_stack_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); /* Access to perf_event_open(2) syscall. */ #define PERF_SECURITY_OPEN 0 diff --git a/include/linux/printk.h b/include/linux/printk.h index e061635e0409..fcde0772ec98 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -189,7 +189,7 @@ extern int printk_delay_msec; extern int dmesg_restrict; extern int -devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void __user *buf, +devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos); extern void wake_up_klogd(void); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index d4f6215ee03f..7b4d3a49b6c5 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -12,9 +12,8 @@ extern unsigned int sysctl_hung_task_panic; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_check_interval_secs; extern int sysctl_hung_task_warnings; -extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); +int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); #else /* Avoid need for ifdefs elsewhere in the code */ enum { sysctl_hung_task_timeout_secs = 0 }; @@ -43,8 +42,7 @@ extern __read_mostly unsigned int sysctl_sched_migration_cost; extern __read_mostly unsigned int sysctl_sched_nr_migrate; int sched_proc_update_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, - loff_t *ppos); + void *buffer, size_t *length, loff_t *ppos); #endif /* @@ -72,33 +70,21 @@ extern unsigned int sysctl_sched_autogroup_enabled; extern int sysctl_sched_rr_timeslice; extern int sched_rr_timeslice; -extern int sched_rr_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - -extern int sched_rt_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - -#ifdef CONFIG_UCLAMP_TASK -extern int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -#endif - -extern int sysctl_numa_balancing(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - -extern int sysctl_schedstats(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); +int sched_rr_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +int sched_rt_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) extern unsigned int sysctl_sched_energy_aware; -extern int sched_energy_aware_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); +int sched_energy_aware_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); #endif #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/include/linux/security.h b/include/linux/security.h index a8d9310472df..6aa229b252ce 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -211,7 +211,7 @@ struct request_sock; #ifdef CONFIG_MMU extern int mmap_min_addr_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); #endif /* security_inode_init_security callback function to write xattrs */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 36143ca40b56..f2401e45a3c2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -44,35 +44,26 @@ struct ctl_dir; extern const int sysctl_vals[]; -typedef int proc_handler (struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); - -extern int proc_dostring(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_dointvec(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_douintvec(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_dointvec_minmax(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_douintvec_minmax(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int proc_dointvec_jiffies(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_doulongvec_minmax(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, - void __user *, size_t *, loff_t *); -extern int proc_do_large_bitmap(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_do_static_key(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); +typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer, + size_t *lenp, loff_t *ppos); + +int proc_dostring(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *, + loff_t *); +int proc_dointvec_ms_jiffies(struct ctl_table *, int, void *, size_t *, + loff_t *); +int proc_doulongvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, void *, + size_t *, loff_t *); +int proc_do_large_bitmap(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_do_static_key(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); /* * Register a set of sysctl names by calling register_sysctl_table @@ -246,7 +237,7 @@ static inline void setup_sysctl_set(struct ctl_table_set *p, #endif /* CONFIG_SYSCTL */ -int sysctl_max_threads(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); +int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); #endif /* _LINUX_SYSCTL_H */ diff --git a/include/linux/timer.h b/include/linux/timer.h index 0dc19a8c39c9..07910ae5ddd9 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -201,8 +201,7 @@ struct ctl_table; extern unsigned int sysctl_timer_migration; int timer_migration_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); #endif unsigned long __round_jiffies(unsigned long j, int cpu); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 292485f3d24d..cb507151710f 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -16,8 +16,8 @@ extern int sysctl_stat_interval; #define DISABLE_NUMA_STAT 0 extern int sysctl_vm_numa_stat; DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key); -extern int sysctl_vm_numa_stat_handler(struct ctl_table *table, - int write, void __user *buffer, size_t *length, loff_t *ppos); +int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write, + void *buffer, size_t *length, loff_t *ppos); #endif struct reclaim_stat { @@ -274,8 +274,8 @@ void cpu_vm_stats_fold(int cpu); void refresh_zone_stat_thresholds(void); struct ctl_table; -int vmstat_refresh(struct ctl_table *, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); +int vmstat_refresh(struct ctl_table *, int write, void *buffer, size_t *lenp, + loff_t *ppos); void drain_zonestat(struct zone *zone, struct per_cpu_pageset *); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a19d845dd7eb..f8a7e1a850fb 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -362,24 +362,18 @@ extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; -extern int dirty_background_ratio_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int dirty_background_bytes_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int dirty_ratio_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int dirty_bytes_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); +int dirty_background_ratio_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int dirty_background_bytes_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int dirty_ratio_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int dirty_bytes_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); int dirtytime_interval_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); - -struct ctl_table; -int dirty_writeback_centisecs_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); + void *buffer, size_t *lenp, loff_t *ppos); +int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); -- cgit v1.2.3 From f9d041271cf44ca02eed0cc82e1a6d8c814c53ed Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:05 -0700 Subject: bpf: Refactor bpf_link update handling Make bpf_link update support more generic by making it into another bpf_link_ops methods. This allows generic syscall handling code to be agnostic to various conditionally compiled features (e.g., the case of CONFIG_CGROUP_BPF). This also allows to keep link type-specific code to remain static within respective code base. Refactor existing bpf_cgroup_link code and take advantage of this. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-2-andriin@fb.com --- include/linux/bpf-cgroup.h | 12 ------------ include/linux/bpf.h | 3 ++- 2 files changed, 2 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 0b41fd5fc96b..a9cb9a5bf8e9 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -100,8 +100,6 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, struct bpf_cgroup_link *link, enum bpf_attach_type type); -int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link, - struct bpf_prog *new_prog); int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -112,8 +110,6 @@ int cgroup_bpf_attach(struct cgroup *cgrp, u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); -int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog, - struct bpf_prog *new_prog); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -353,7 +349,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, #else struct bpf_prog; -struct bpf_link; struct cgroup_bpf {}; static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} @@ -377,13 +372,6 @@ static inline int cgroup_bpf_link_attach(const union bpf_attr *attr, return -EINVAL; } -static inline int cgroup_bpf_replace(struct bpf_link *link, - struct bpf_prog *old_prog, - struct bpf_prog *new_prog) -{ - return -EINVAL; -} - static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 10960cfabea4..81c8620cb4c4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1093,7 +1093,8 @@ struct bpf_link { struct bpf_link_ops { void (*release)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link); - + int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog); }; void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, -- cgit v1.2.3 From a3b80e1078943dc12553166fb08e258463dec013 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:06 -0700 Subject: bpf: Allocate ID for bpf_link Generate ID for each bpf_link using IDR, similarly to bpf_map and bpf_prog. bpf_link creation, initialization, attachment, and exposing to user-space through FD and ID is a complicated multi-step process, abstract it away through bpf_link_primer and bpf_link_prime(), bpf_link_settle(), and bpf_link_cleanup() internal API. They guarantee that until bpf_link is properly attached, user-space won't be able to access partially-initialized bpf_link either from FD or ID. All this allows to simplify bpf_link attachment and error handling code. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-3-andriin@fb.com --- include/linux/bpf.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 81c8620cb4c4..875d1f0af803 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1085,11 +1085,19 @@ int bpf_prog_new_fd(struct bpf_prog *prog); struct bpf_link { atomic64_t refcnt; + u32 id; const struct bpf_link_ops *ops; struct bpf_prog *prog; struct work_struct work; }; +struct bpf_link_primer { + struct bpf_link *link; + struct file *file; + int fd; + u32 id; +}; + struct bpf_link_ops { void (*release)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link); @@ -1097,10 +1105,11 @@ struct bpf_link_ops { struct bpf_prog *old_prog); }; -void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, - struct bpf_prog *prog); -void bpf_link_cleanup(struct bpf_link *link, struct file *link_file, - int link_fd); +void bpf_link_init(struct bpf_link *link, + const struct bpf_link_ops *ops, struct bpf_prog *prog); +int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); +int bpf_link_settle(struct bpf_link_primer *primer); +void bpf_link_cleanup(struct bpf_link_primer *primer); void bpf_link_inc(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); -- cgit v1.2.3 From f2e10bff16a0fdd41ba278c84da9813700e356af Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:08 -0700 Subject: bpf: Add support for BPF_OBJ_GET_INFO_BY_FD for bpf_link Add ability to fetch bpf_link details through BPF_OBJ_GET_INFO_BY_FD command. Also enhance show_fdinfo to potentially include bpf_link type-specific information (similarly to obj_info). Also introduce enum bpf_link_type stored in bpf_link itself and expose it in UAPI. bpf_link_tracing also now will store and return bpf_attach_type. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-5-andriin@fb.com --- include/linux/bpf-cgroup.h | 2 -- include/linux/bpf.h | 8 +++++++- include/linux/bpf_types.h | 6 ++++++ 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a9cb9a5bf8e9..272626cc3fc9 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -57,8 +57,6 @@ struct bpf_cgroup_link { enum bpf_attach_type type; }; -extern const struct bpf_link_ops bpf_cgroup_link_lops; - struct bpf_prog_list { struct list_head node; struct bpf_prog *prog; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 875d1f0af803..c07b1d2f3824 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1026,9 +1026,11 @@ extern const struct file_operations bpf_prog_fops; extern const struct bpf_verifier_ops _name ## _verifier_ops; #define BPF_MAP_TYPE(_id, _ops) \ extern const struct bpf_map_ops _ops; +#define BPF_LINK_TYPE(_id, _name) #include #undef BPF_PROG_TYPE #undef BPF_MAP_TYPE +#undef BPF_LINK_TYPE extern const struct bpf_prog_ops bpf_offload_prog_ops; extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; @@ -1086,6 +1088,7 @@ int bpf_prog_new_fd(struct bpf_prog *prog); struct bpf_link { atomic64_t refcnt; u32 id; + enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; struct work_struct work; @@ -1103,9 +1106,12 @@ struct bpf_link_ops { void (*dealloc)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); + void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); + int (*fill_link_info)(const struct bpf_link *link, + struct bpf_link_info *info); }; -void bpf_link_init(struct bpf_link *link, +void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog); int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); int bpf_link_settle(struct bpf_link_primer *primer); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index ba0c2d56f8a3..8345cdf553b8 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -118,3 +118,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) #if defined(CONFIG_BPF_JIT) BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) #endif + +BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) +BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) +#ifdef CONFIG_CGROUP_BPF +BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup) +#endif -- cgit v1.2.3 From d46edd671a147032e22cfeb271a5734703093649 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 30 Apr 2020 00:15:04 -0700 Subject: bpf: Sharing bpf runtime stats with BPF_ENABLE_STATS Currently, sysctl kernel.bpf_stats_enabled controls BPF runtime stats. Typical userspace tools use kernel.bpf_stats_enabled as follows: 1. Enable kernel.bpf_stats_enabled; 2. Check program run_time_ns; 3. Sleep for the monitoring period; 4. Check program run_time_ns again, calculate the difference; 5. Disable kernel.bpf_stats_enabled. The problem with this approach is that only one userspace tool can toggle this sysctl. If multiple tools toggle the sysctl at the same time, the measurement may be inaccurate. To fix this problem while keep backward compatibility, introduce a new bpf command BPF_ENABLE_STATS. On success, this command enables stats and returns a valid fd. BPF_ENABLE_STATS takes argument "type". Currently, only one type, BPF_STATS_RUN_TIME, is supported. We can extend the command to support other types of stats in the future. With BPF_ENABLE_STATS, user space tool would have the following flow: 1. Get a fd with BPF_ENABLE_STATS, and make sure it is valid; 2. Check program run_time_ns; 3. Sleep for the monitoring period; 4. Check program run_time_ns again, calculate the difference; 5. Close the fd. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200430071506.1408910-2-songliubraving@fb.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c07b1d2f3824..1262ec460ab3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -987,6 +987,7 @@ _out: \ #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); +extern struct mutex bpf_stats_enabled_mutex; /* * Block execution of BPF programs attached to instrumentation (perf, -- cgit v1.2.3