diff options
Diffstat (limited to 'include/linux')
43 files changed, 1118 insertions, 1076 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 88a51b242adc..669d96d074ad 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -225,24 +225,20 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ ({ \ - u32 __unused_flags; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, \ - &__unused_flags); \ + NULL, NULL); \ __ret; \ }) #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ ({ \ - u32 __unused_flags; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - t_ctx, \ - &__unused_flags); \ + t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ecc3d3ec41cf..2b914a56a2c5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/percpu-refcount.h> #include <linux/bpfptr.h> +#include <linux/btf.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -88,6 +89,7 @@ struct bpf_map_ops { int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); int (*map_pop_elem)(struct bpf_map *map, void *value); int (*map_peek_elem)(struct bpf_map *map, void *value); + void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, @@ -147,14 +149,48 @@ struct bpf_map_ops { bpf_callback_t callback_fn, void *callback_ctx, u64 flags); - /* BTF name and id of struct allocated by map_alloc */ - const char * const map_btf_name; + /* BTF id of struct allocated by map_alloc */ int *map_btf_id; /* bpf_iter info used to open a seq_file */ const struct bpf_iter_seq_info *iter_seq_info; }; +enum { + /* Support at most 8 pointers in a BPF map value */ + BPF_MAP_VALUE_OFF_MAX = 8, + BPF_MAP_OFF_ARR_MAX = BPF_MAP_VALUE_OFF_MAX + + 1 + /* for bpf_spin_lock */ + 1, /* for bpf_timer */ +}; + +enum bpf_kptr_type { + BPF_KPTR_UNREF, + BPF_KPTR_REF, +}; + +struct bpf_map_value_off_desc { + u32 offset; + enum bpf_kptr_type type; + struct { + struct btf *btf; + struct module *module; + btf_dtor_kfunc_t dtor; + u32 btf_id; + } kptr; +}; + +struct bpf_map_value_off { + u32 nr_off; + struct bpf_map_value_off_desc off[]; +}; + +struct bpf_map_off_arr { + u32 cnt; + u32 field_off[BPF_MAP_OFF_ARR_MAX]; + u8 field_sz[BPF_MAP_OFF_ARR_MAX]; +}; + struct bpf_map { /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). @@ -171,6 +207,7 @@ struct bpf_map { u64 map_extra; /* any per-map-type extra fields */ u32 map_flags; int spin_lock_off; /* >=0 valid offset, <0 error */ + struct bpf_map_value_off *kptr_off_tab; int timer_off; /* >=0 valid offset, <0 error */ u32 id; int numa_node; @@ -182,10 +219,7 @@ struct bpf_map { struct mem_cgroup *memcg; #endif char name[BPF_OBJ_NAME_LEN]; - bool bypass_spec_v1; - bool frozen; /* write-once; write-protected by freeze_mutex */ - /* 14 bytes hole */ - + struct bpf_map_off_arr *off_arr; /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ @@ -205,6 +239,8 @@ struct bpf_map { bool jited; bool xdp_has_frags; } owner; + bool bypass_spec_v1; + bool frozen; /* write-once; write-protected by freeze_mutex */ }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -217,43 +253,44 @@ static inline bool map_value_has_timer(const struct bpf_map *map) return map->timer_off >= 0; } +static inline bool map_value_has_kptrs(const struct bpf_map *map) +{ + return !IS_ERR_OR_NULL(map->kptr_off_tab); +} + static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { if (unlikely(map_value_has_spin_lock(map))) memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock)); if (unlikely(map_value_has_timer(map))) memset(dst + map->timer_off, 0, sizeof(struct bpf_timer)); + if (unlikely(map_value_has_kptrs(map))) { + struct bpf_map_value_off *tab = map->kptr_off_tab; + int i; + + for (i = 0; i < tab->nr_off; i++) + *(u64 *)(dst + tab->off[i].offset) = 0; + } } /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) { - u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0; + u32 curr_off = 0; + int i; - if (unlikely(map_value_has_spin_lock(map))) { - s_off = map->spin_lock_off; - s_sz = sizeof(struct bpf_spin_lock); - } - if (unlikely(map_value_has_timer(map))) { - t_off = map->timer_off; - t_sz = sizeof(struct bpf_timer); + if (likely(!map->off_arr)) { + memcpy(dst, src, map->value_size); + return; } - if (unlikely(s_sz || t_sz)) { - if (s_off < t_off || !s_sz) { - swap(s_off, t_off); - swap(s_sz, t_sz); - } - memcpy(dst, src, t_off); - memcpy(dst + t_off + t_sz, - src + t_off + t_sz, - s_off - t_off - t_sz); - memcpy(dst + s_off + s_sz, - src + s_off + s_sz, - map->value_size - s_off - s_sz); - } else { - memcpy(dst, src, map->value_size); + for (i = 0; i < map->off_arr->cnt; i++) { + u32 next_off = map->off_arr->field_off[i]; + + memcpy(dst + curr_off, src + curr_off, next_off - curr_off); + curr_off += map->off_arr->field_sz[i]; } + memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); @@ -342,9 +379,31 @@ enum bpf_type_flag { */ MEM_PERCPU = BIT(4 + BPF_BASE_TYPE_BITS), - __BPF_TYPE_LAST_FLAG = MEM_PERCPU, + /* Indicates that the argument will be released. */ + OBJ_RELEASE = BIT(5 + BPF_BASE_TYPE_BITS), + + /* PTR is not trusted. This is only used with PTR_TO_BTF_ID, to mark + * unreferenced and referenced kptr loaded from map value using a load + * instruction, so that they can only be dereferenced but not escape the + * BPF program into the kernel (i.e. cannot be passed as arguments to + * kfunc or bpf helpers). + */ + PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS), + + MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to memory local to the bpf program. */ + DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to a ringbuf record. */ + DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_FLAG_MAX, + __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; +#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF) + /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -361,16 +420,11 @@ enum bpf_arg_type { ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ - ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ - /* the following constraints used to prototype bpf_memcmp() and other - * functions that access data on eBPF program stack + /* Used to prototype bpf_memcmp() and other functions that access data + * on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ - ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, - * helper function must fill all bytes or clear - * them in error case. - */ ARG_CONST_SIZE, /* number of bytes accessed from memory */ ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ @@ -391,6 +445,8 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ + ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ + ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */ __BPF_ARG_TYPE_MAX, /* Extended arg_types. */ @@ -400,6 +456,11 @@ enum bpf_arg_type { ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, + ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, + /* pointer to memory does not need to be initialized, helper function must fill + * all bytes or clear them in error case. + */ + ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. @@ -427,6 +488,7 @@ enum bpf_return_type { RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is @@ -672,15 +734,17 @@ struct btf_func_model { #define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 - * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 + * bytes on x86. */ -#define BPF_MAX_TRAMP_PROGS 38 +#define BPF_MAX_TRAMP_LINKS 38 -struct bpf_tramp_progs { - struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS]; - int nr_progs; +struct bpf_tramp_links { + struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS]; + int nr_links; }; +struct bpf_tramp_run_ctx; + /* Different use cases for BPF trampoline: * 1. replace nop at the function entry (kprobe equivalent) * flags = BPF_TRAMP_F_RESTORE_REGS @@ -704,13 +768,14 @@ struct bpf_tramp_progs { struct bpf_tramp_image; int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_progs *tprogs, + struct bpf_tramp_links *tlinks, void *orig_call); /* these two functions are called from generated trampoline */ -u64 notrace __bpf_prog_enter(struct bpf_prog *prog); -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog); -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start); +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); @@ -803,9 +868,10 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( { return bpf_func(ctx, insnsi); } + #ifdef CONFIG_BPF_JIT -int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); -int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); @@ -856,12 +922,12 @@ int bpf_jit_charge_modmem(u32 size); void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else -static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, +static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { return -ENOTSUPP; } -static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog, +static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { return -ENOTSUPP; @@ -960,7 +1026,6 @@ struct bpf_prog_aux { bool tail_call_reachable; bool xdp_has_frags; bool use_bpf_prog_pack; - struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1047,6 +1112,19 @@ struct bpf_link_ops { struct bpf_link_info *info); }; +struct bpf_tramp_link { + struct bpf_link link; + struct hlist_node tramp_hlist; + u64 cookie; +}; + +struct bpf_tracing_link { + struct bpf_tramp_link link; + enum bpf_attach_type attach_type; + struct bpf_trampoline *trampoline; + struct bpf_prog *tgt_prog; +}; + struct bpf_link_primer { struct bpf_link *link; struct file *file; @@ -1084,8 +1162,8 @@ bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value); -int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, - struct bpf_prog *prog, +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, + struct bpf_tramp_link *link, const struct btf_func_model *model, void *image, void *image_end); static inline bool bpf_try_module_get(const void *data, struct module *owner) @@ -1221,7 +1299,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, /* an array of programs to be executed under rcu_lock. * * Typical usage: - * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, bpf_prog_run); + * ret = bpf_prog_run_array(rcu_dereference(&bpf_prog_array), ctx, bpf_prog_run); * * the structure returned by bpf_prog_array_alloc() should be populated * with program pointers and the last pointer must be NULL. @@ -1290,6 +1368,12 @@ struct bpf_trace_run_ctx { u64 bpf_cookie; }; +struct bpf_tramp_run_ctx { + struct bpf_run_ctx run_ctx; + u64 bpf_cookie; + struct bpf_run_ctx *saved_run_ctx; +}; + static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) { struct bpf_run_ctx *old_ctx = NULL; @@ -1315,83 +1399,22 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); -static __always_inline int -BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog, - int retval, u32 *ret_flags) -{ - const struct bpf_prog_array_item *item; - const struct bpf_prog *prog; - const struct bpf_prog_array *array; - struct bpf_run_ctx *old_run_ctx; - struct bpf_cg_run_ctx run_ctx; - u32 func_ret; - - run_ctx.retval = retval; - migrate_disable(); - rcu_read_lock(); - array = rcu_dereference(array_rcu); - item = &array->items[0]; - old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); - while ((prog = READ_ONCE(item->prog))) { - run_ctx.prog_item = item; - func_ret = run_prog(prog, ctx); - if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval)) - run_ctx.retval = -EPERM; - *(ret_flags) |= (func_ret >> 1); - item++; - } - bpf_reset_run_ctx(old_run_ctx); - rcu_read_unlock(); - migrate_enable(); - return run_ctx.retval; -} - -static __always_inline int -BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog, - int retval) -{ - const struct bpf_prog_array_item *item; - const struct bpf_prog *prog; - const struct bpf_prog_array *array; - struct bpf_run_ctx *old_run_ctx; - struct bpf_cg_run_ctx run_ctx; - - run_ctx.retval = retval; - migrate_disable(); - rcu_read_lock(); - array = rcu_dereference(array_rcu); - item = &array->items[0]; - old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); - while ((prog = READ_ONCE(item->prog))) { - run_ctx.prog_item = item; - if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval)) - run_ctx.retval = -EPERM; - item++; - } - bpf_reset_run_ctx(old_run_ctx); - rcu_read_unlock(); - migrate_enable(); - return run_ctx.retval; -} - static __always_inline u32 -BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu, +bpf_prog_run_array(const struct bpf_prog_array *array, const void *ctx, bpf_prog_run_fn run_prog) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; - const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_trace_run_ctx run_ctx; u32 ret = 1; - migrate_disable(); - rcu_read_lock(); - array = rcu_dereference(array_rcu); + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held"); + if (unlikely(!array)) - goto out; + return ret; + + migrate_disable(); old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { @@ -1400,50 +1423,10 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu, item++; } bpf_reset_run_ctx(old_run_ctx); -out: - rcu_read_unlock(); migrate_enable(); return ret; } -/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs - * so BPF programs can request cwr for TCP packets. - * - * Current cgroup skb programs can only return 0 or 1 (0 to drop the - * packet. This macro changes the behavior so the low order bit - * indicates whether the packet should be dropped (0) or not (1) - * and the next bit is a congestion notification bit. This could be - * used by TCP to call tcp_enter_cwr() - * - * Hence, new allowed return values of CGROUP EGRESS BPF programs are: - * 0: drop packet - * 1: keep packet - * 2: drop packet and cn - * 3: keep packet and cn - * - * This macro then converts it to one of the NET_XMIT or an error - * code that is then interpreted as drop packet (and no cn): - * 0: NET_XMIT_SUCCESS skb should be transmitted - * 1: NET_XMIT_DROP skb should be dropped and cn - * 2: NET_XMIT_CN skb should be transmitted and cn - * 3: -err skb should be dropped - */ -#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ - ({ \ - u32 _flags = 0; \ - bool _cn; \ - u32 _ret; \ - _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \ - _cn = _flags & BPF_RET_SET_CN; \ - if (_ret && !IS_ERR_VALUE((long)_ret)) \ - _ret = -EFAULT; \ - if (!_ret) \ - _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ - else \ - _ret = (_cn ? NET_XMIT_DROP : _ret); \ - _ret; \ - }) - #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); extern struct mutex bpf_stats_enabled_mutex; @@ -1497,6 +1480,12 @@ void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); +struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset); +void bpf_map_free_kptr_off_tab(struct bpf_map *map); +struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map); +bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b); +void bpf_map_free_kptrs(struct bpf_map *map, void *map_value); + struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); @@ -1590,6 +1579,7 @@ void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); struct bpf_link *bpf_link_get_from_fd(u32 ufd); +struct bpf_link *bpf_link_get_curr_or_next(u32 *id); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); @@ -1793,7 +1783,8 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, u32 *next_btf_id, enum bpf_type_flag *flag); bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, - const struct btf *need_btf, u32 need_type_id); + const struct btf *need_btf, u32 need_type_id, + bool strict); int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf *btf, @@ -2208,6 +2199,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_map_push_elem_proto; extern const struct bpf_func_proto bpf_map_pop_elem_proto; extern const struct bpf_func_proto bpf_map_peek_elem_proto; +extern const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; @@ -2245,12 +2237,16 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto; extern const struct bpf_func_proto bpf_ringbuf_submit_proto; extern const struct bpf_func_proto bpf_ringbuf_discard_proto; extern const struct bpf_func_proto bpf_ringbuf_query_proto; +extern const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto; +extern const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto; +extern const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto; extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto; +extern const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_snprintf_proto; @@ -2270,6 +2266,7 @@ extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_strncmp_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; +extern const struct bpf_func_proto bpf_kptr_xchg_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2383,6 +2380,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); void *bpf_arch_text_copy(void *dst, void *src, size_t len); +int bpf_arch_text_invalidate(void *dst, size_t len); struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); @@ -2393,4 +2391,33 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 **bin_buf, u32 num_args); void bpf_bprintf_cleanup(void); +/* the implementation of the opaque uapi struct bpf_dynptr */ +struct bpf_dynptr_kern { + void *data; + /* Size represents the number of usable bytes of dynptr data. + * If for example the offset is at 4 for a local dynptr whose data is + * of type u64, the number of usable bytes is 4. + * + * The upper 8 bits are reserved. It is as follows: + * Bits 0 - 23 = size + * Bits 24 - 30 = dynptr type + * Bit 31 = whether dynptr is read-only + */ + u32 size; + u32 offset; +} __aligned(8); + +enum bpf_dynptr_type { + BPF_DYNPTR_TYPE_INVALID, + /* Points to memory that is local to the bpf program */ + BPF_DYNPTR_TYPE_LOCAL, + /* Underlying data is a ringbuf record */ + BPF_DYNPTR_TYPE_RINGBUF, +}; + +void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size); +void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); +int bpf_dynptr_check_size(u32 size); + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 493e63258497..7ea18d4da84b 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -143,9 +143,9 @@ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem, - bool uncharge_omem); + bool uncharge_omem, bool use_trace_rcu); -void bpf_selem_unlink(struct bpf_local_storage_elem *selem); +void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu); void bpf_selem_link_map(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem *selem); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 3e24ad0c4b3c..2b9112b80171 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -141,3 +141,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) +BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3a9d2d7cc6b7..e8439f6cbe57 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -72,6 +72,18 @@ struct bpf_reg_state { u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ + /* For dynptr stack slots */ + struct { + enum bpf_dynptr_type type; + /* A dynptr is 16 bytes so it takes up 2 stack slots. + * We need to track which slot is the first slot + * to protect against cases where the user may try to + * pass in an address starting at the second slot of the + * dynptr. + */ + bool first_slot; + } dynptr; + /* Max size from any of the above. */ struct { unsigned long raw1; @@ -88,6 +100,8 @@ struct bpf_reg_state { * for the purpose of tracking that it's freed. * For PTR_TO_SOCKET this is used to share which pointers retain the * same reference to the socket, to determine proper reference freeing. + * For stack slots that are dynptrs, this is used to track references to + * the dynptr to determine proper reference freeing. */ u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned @@ -174,9 +188,15 @@ enum bpf_stack_slot_type { STACK_SPILL, /* register spilled into stack */ STACK_MISC, /* BPF program wrote some data into this slot */ STACK_ZERO, /* BPF program wrote constant zero */ + /* A dynptr is stored in this stack slot. The type of dynptr + * is stored in bpf_stack_state->spilled_ptr.dynptr.type + */ + STACK_DYNPTR, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ +#define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern) +#define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE) struct bpf_stack_state { struct bpf_reg_state spilled_ptr; @@ -523,8 +543,7 @@ int check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno); int check_func_arg_reg_off(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, - enum bpf_arg_type arg_type, - bool is_release_func); + enum bpf_arg_type arg_type); int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, diff --git a/include/linux/btf.h b/include/linux/btf.h index 36bc09b8e890..2611cea2c2b6 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -17,6 +17,7 @@ enum btf_kfunc_type { BTF_KFUNC_TYPE_ACQUIRE, BTF_KFUNC_TYPE_RELEASE, BTF_KFUNC_TYPE_RET_NULL, + BTF_KFUNC_TYPE_KPTR_ACQUIRE, BTF_KFUNC_TYPE_MAX, }; @@ -35,11 +36,19 @@ struct btf_kfunc_id_set { struct btf_id_set *acquire_set; struct btf_id_set *release_set; struct btf_id_set *ret_null_set; + struct btf_id_set *kptr_acquire_set; }; struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; }; }; +struct btf_id_dtor_kfunc { + u32 btf_id; + u32 kfunc_btf_id; +}; + +typedef void (*btf_dtor_kfunc_t)(void *); + extern const struct file_operations btf_fops; void btf_get(struct btf *btf); @@ -123,6 +132,8 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, u32 expected_offset, u32 expected_size); int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); int btf_find_timer(const struct btf *btf, const struct btf_type *t); +struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf, + const struct btf_type *t); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, @@ -344,6 +355,9 @@ bool btf_kfunc_id_set_contains(const struct btf *btf, enum btf_kfunc_type type, u32 kfunc_btf_id); int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s); +s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); +int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, + struct module *owner); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) @@ -367,6 +381,15 @@ static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, { return 0; } +static inline s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id) +{ + return -ENOENT; +} +static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, + u32 add_cnt, struct module *owner) +{ + return 0; +} #endif #endif diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index bc5d9cc34e4c..335a19092368 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -178,7 +178,8 @@ extern struct btf_id_set name; BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) enum { #define BTF_SOCK_TYPE(name, str) name, diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index c2ea47f30046..e22dc03c850e 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -17,7 +17,6 @@ #include <linux/can.h> #include <linux/can/bittiming.h> #include <linux/can/error.h> -#include <linux/can/led.h> #include <linux/can/length.h> #include <linux/can/netlink.h> #include <linux/can/skb.h> @@ -85,15 +84,6 @@ struct can_priv { int (*do_get_berr_counter)(const struct net_device *dev, struct can_berr_counter *bec); int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv); - -#ifdef CONFIG_CAN_LEDS - struct led_trigger *tx_led_trig; - char tx_led_trig_name[CAN_LED_NAME_SZ]; - struct led_trigger *rx_led_trig; - char rx_led_trig_name[CAN_LED_NAME_SZ]; - struct led_trigger *rxtx_led_trig; - char rxtx_led_trig_name[CAN_LED_NAME_SZ]; -#endif }; static inline bool can_tdc_is_enabled(const struct can_priv *priv) diff --git a/include/linux/can/led.h b/include/linux/can/led.h deleted file mode 100644 index 7c3cfd798c56..000000000000 --- a/include/linux/can/led.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2012, Fabio Baltieri <fabio.baltieri@gmail.com> - */ - -#ifndef _CAN_LED_H -#define _CAN_LED_H - -#include <linux/if.h> -#include <linux/leds.h> -#include <linux/netdevice.h> - -enum can_led_event { - CAN_LED_EVENT_OPEN, - CAN_LED_EVENT_STOP, - CAN_LED_EVENT_TX, - CAN_LED_EVENT_RX, -}; - -#ifdef CONFIG_CAN_LEDS - -/* keep space for interface name + "-tx"/"-rx"/"-rxtx" - * suffix and null terminator - */ -#define CAN_LED_NAME_SZ (IFNAMSIZ + 6) - -void can_led_event(struct net_device *netdev, enum can_led_event event); -void devm_can_led_init(struct net_device *netdev); -int __init can_led_notifier_init(void); -void __exit can_led_notifier_exit(void); - -#else - -static inline void can_led_event(struct net_device *netdev, - enum can_led_event event) -{ -} -static inline void devm_can_led_init(struct net_device *netdev) -{ -} -static inline int can_led_notifier_init(void) -{ - return 0; -} -static inline void can_led_notifier_exit(void) -{ -} - -#endif - -#endif /* !_CAN_LED_H */ diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index c11477620403..c205c51d79c9 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -42,8 +42,8 @@ int can_rx_offload_add_manual(struct net_device *dev, int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg); int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); -int can_rx_offload_queue_sorted(struct can_rx_offload *offload, - struct sk_buff *skb, u32 timestamp); +int can_rx_offload_queue_timestamp(struct can_rx_offload *offload, + struct sk_buff *skb, u32 timestamp); unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, unsigned int idx, u32 timestamp, unsigned int *frame_len_ptr); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 4af58459a1e7..99dc7bfbcd3c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -71,11 +71,13 @@ enum { * struct kernel_ethtool_ringparam - RX/TX ring configuration * @rx_buf_len: Current length of buffers on the rx ring. * @tcp_data_split: Scatter packet headers and data to separate buffers + * @tx_push: The flag of tx push mode * @cqe_size: Size of TX/RX completion queue event */ struct kernel_ethtool_ringparam { u32 rx_buf_len; u8 tcp_data_split; + u8 tx_push; u32 cqe_size; }; @@ -83,10 +85,12 @@ struct kernel_ethtool_ringparam { * enum ethtool_supported_ring_param - indicator caps for setting ring params * @ETHTOOL_RING_USE_RX_BUF_LEN: capture for setting rx_buf_len * @ETHTOOL_RING_USE_CQE_SIZE: capture for setting cqe_size + * @ETHTOOL_RING_USE_TX_PUSH: capture for setting tx_push */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), ETHTOOL_RING_USE_CQE_SIZE = BIT(1), + ETHTOOL_RING_USE_TX_PUSH = BIT(2), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 295637a66c46..3b401fa0f374 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -52,6 +52,22 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) #define __underlying_strncpy __builtin_strncpy #endif +/** + * unsafe_memcpy - memcpy implementation with no FORTIFY bounds checking + * + * @dst: Destination memory address to write to + * @src: Source memory address to read from + * @bytes: How many bytes to write to @dst from @src + * @justification: Free-form text or comment describing why the use is needed + * + * This should be used for corner cases where the compiler cannot do the + * right thing, or during transitions between APIs, etc. It should be used + * very rarely, and includes a place for justification detailing where bounds + * checking has happened, and why existing solutions cannot be employed. + */ +#define unsafe_memcpy(dst, src, bytes, justification) \ + __underlying_memcpy(dst, src, bytes) + /* * Clang's use of __builtin_object_size() within inlines needs hinting via * __pass_object_size(). The preference is to only ever use type 1 (member diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4816b7e11047..820500430eae 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -303,6 +303,8 @@ int unregister_ftrace_function(struct ftrace_ops *ops); extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op, struct ftrace_regs *fregs); + +int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs); #else /* !CONFIG_FUNCTION_TRACER */ /* * (un)register_ftrace_function must be a macro since the ops parameter @@ -313,6 +315,10 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1, static inline void ftrace_kill(void) { } static inline void ftrace_free_init_mem(void) { } static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } +static inline int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_FUNCTION_TRACER */ struct ftrace_func_entry { diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 9055cb380ee2..db0f4fcfdaf4 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -79,8 +79,9 @@ extern int icmpv6_init(void); extern int icmpv6_err_convert(u8 type, u8 code, int *err); extern void icmpv6_cleanup(void); -extern void icmpv6_param_prob(struct sk_buff *skb, - u8 code, int pos); +extern void icmpv6_param_prob_reason(struct sk_buff *skb, + u8 code, int pos, + enum skb_drop_reason reason); struct flowi6; struct in6_addr; @@ -91,6 +92,12 @@ extern void icmpv6_flow_init(struct sock *sk, const struct in6_addr *daddr, int oif); +static inline void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) +{ + icmpv6_param_prob_reason(skb, code, pos, + SKB_DROP_REASON_NOT_SPECIFIED); +} + static inline bool icmpv6_is_err(int type) { switch (type) { diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 95c831162212..f1f9412b6ac6 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -134,18 +134,46 @@ enum { * a successful transmission. */ IEEE802154_SUCCESS = 0x0, - + /* The requested operation failed. */ + IEEE802154_MAC_ERROR = 0x1, + /* The requested operation has been cancelled. */ + IEEE802154_CANCELLED = 0x2, + /* + * Device is ready to poll the coordinator for data in a non beacon + * enabled PAN. + */ + IEEE802154_READY_FOR_POLL = 0x3, + /* Wrong frame counter. */ + IEEE802154_COUNTER_ERROR = 0xdb, + /* + * The frame does not conforms to the incoming key usage policy checking + * procedure. + */ + IEEE802154_IMPROPER_KEY_TYPE = 0xdc, + /* + * The frame does not conforms to the incoming security level usage + * policy checking procedure. + */ + IEEE802154_IMPROPER_SECURITY_LEVEL = 0xdd, + /* Secured frame received with an empty Frame Version field. */ + IEEE802154_UNSUPPORTED_LEGACY = 0xde, + /* + * A secured frame is received or must be sent but security is not + * enabled in the device. Or, the Auxiliary Security Header has security + * level of zero in it. + */ + IEEE802154_UNSUPPORTED_SECURITY = 0xdf, /* The beacon was lost following a synchronization request. */ - IEEE802154_BEACON_LOSS = 0xe0, + IEEE802154_BEACON_LOST = 0xe0, /* * A transmission could not take place due to activity on the * channel, i.e., the CSMA-CA mechanism has failed. */ - IEEE802154_CHNL_ACCESS_FAIL = 0xe1, + IEEE802154_CHANNEL_ACCESS_FAILURE = 0xe1, /* The GTS request has been denied by the PAN coordinator. */ - IEEE802154_DENINED = 0xe2, + IEEE802154_DENIED = 0xe2, /* The attempt to disable the transceiver has failed. */ - IEEE802154_DISABLE_TRX_FAIL = 0xe3, + IEEE802154_DISABLE_TRX_FAILURE = 0xe3, /* * The received frame induces a failed security check according to * the security suite. @@ -185,9 +213,9 @@ enum { * A PAN identifier conflict has been detected and communicated to the * PAN coordinator. */ - IEEE802154_PANID_CONFLICT = 0xee, + IEEE802154_PAN_ID_CONFLICT = 0xee, /* A coordinator realignment command has been received. */ - IEEE802154_REALIGMENT = 0xef, + IEEE802154_REALIGNMENT = 0xef, /* The transaction has expired and its information discarded. */ IEEE802154_TRANSACTION_EXPIRED = 0xf0, /* There is no capacity to store the transaction. */ @@ -203,12 +231,49 @@ enum { * A SET/GET request was issued with the identifier of a PIB attribute * that is not supported. */ - IEEE802154_UNSUPPORTED_ATTR = 0xf4, + IEEE802154_UNSUPPORTED_ATTRIBUTE = 0xf4, + /* Missing source or destination address or address mode. */ + IEEE802154_INVALID_ADDRESS = 0xf5, + /* + * MLME asked to turn the receiver on, but the on time duration is too + * big compared to the macBeaconOrder. + */ + IEEE802154_ON_TIME_TOO_LONG = 0xf6, + /* + * MLME asaked to turn the receiver on, but the request was delayed for + * too long before getting processed. + */ + IEEE802154_PAST_TIME = 0xf7, + /* + * The StartTime parameter is nonzero, and the MLME is not currently + * tracking the beacon of the coordinator through which it is + * associated. + */ + IEEE802154_TRACKING_OFF = 0xf8, + /* + * The index inside the hierarchical values in PIBAttribute is out of + * range. + */ + IEEE802154_INVALID_INDEX = 0xf9, + /* + * The number of PAN descriptors discovered during a scan has been + * reached. + */ + IEEE802154_LIMIT_REACHED = 0xfa, + /* + * The PIBAttribute parameter specifies an attribute that is a read-only + * attribute. + */ + IEEE802154_READ_ONLY = 0xfb, /* * A request to perform a scan operation failed because the MLME was * in the process of performing a previously initiated scan operation. */ IEEE802154_SCAN_IN_PROGRESS = 0xfc, + /* The outgoing superframe overlaps the incoming superframe. */ + IEEE802154_SUPERFRAME_OVERLAP = 0xfd, + /* Any other error situation. */ + IEEE802154_SYSTEM_ERROR = 0xff, }; /* frame control handling */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 16870f86c74d..38c8203d52cb 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -61,6 +61,7 @@ struct ipv6_devconf { __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; __s32 drop_unsolicited_na; + __s32 accept_unsolicited_na; struct ipv6_stable_secret { bool initialized; struct in6_addr secret; @@ -144,6 +145,7 @@ struct inet6_skb_parm { #define IP6SKB_L3SLAVE 64 #define IP6SKB_JUMBOGRAM 128 #define IP6SKB_SEG6 256 +#define IP6SKB_FAKEJUMBO 512 }; #if defined(CONFIG_NET_L3_MASTER_DEV) @@ -339,8 +341,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) return (struct raw6_sock *)sk; } -#define __ipv6_only_sock(sk) (sk->sk_ipv6only) -#define ipv6_only_sock(sk) (__ipv6_only_sock(sk)) +#define ipv6_only_sock(sk) (sk->sk_ipv6only) #define ipv6_sk_rxinfo(sk) ((sk)->sk_family == PF_INET6 && \ inet6_sk(sk)->rxopt.bits.rxinfo) @@ -357,7 +358,6 @@ static inline int inet_v6_ipv6only(const struct sock *sk) return ipv6_only_sock(sk); } #else -#define __ipv6_only_sock(sk) 0 #define ipv6_only_sock(sk) 0 #define ipv6_sk_rxinfo(sk) 0 diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index ce1bd2fbf23e..ad39636e0c3f 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -65,11 +65,11 @@ static inline void *dereference_symbol_descriptor(void *ptr) return ptr; } +#ifdef CONFIG_KALLSYMS int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data); -#ifdef CONFIG_KALLSYMS /* Lookup the address for a symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name); @@ -163,6 +163,11 @@ static inline bool kallsyms_show_value(const struct cred *cred) return false; } +static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, + unsigned long), void *data) +{ + return -EOPNOTSUPP; +} #endif /*CONFIG_KALLSYMS*/ static inline void print_ip_sym(const char *loglvl, unsigned long ip) diff --git a/include/linux/list.h b/include/linux/list.h index dd6c2041d09c..57e8b559cdf6 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -564,6 +564,19 @@ static inline void list_splice_tail_init(struct list_head *list, list_entry((pos)->member.next, typeof(*(pos)), member) /** + * list_next_entry_circular - get the next element in list + * @pos: the type * to cursor. + * @head: the list head to take the element from. + * @member: the name of the list_head within the struct. + * + * Wraparound if pos is the last element (return the first element). + * Note, that list is expected to be not empty. + */ +#define list_next_entry_circular(pos, head, member) \ + (list_is_last(&(pos)->member, head) ? \ + list_first_entry(head, typeof(*(pos)), member) : list_next_entry(pos, member)) + +/** * list_prev_entry - get the prev element in list * @pos: the type * to cursor * @member: the name of the list_head within the struct. @@ -572,6 +585,19 @@ static inline void list_splice_tail_init(struct list_head *list, list_entry((pos)->member.prev, typeof(*(pos)), member) /** + * list_prev_entry_circular - get the prev element in list + * @pos: the type * to cursor. + * @head: the list head to take the element from. + * @member: the name of the list_head within the struct. + * + * Wraparound if pos is the first element (return the last element). + * Note, that list is expected to be not empty. + */ +#define list_prev_entry_circular(pos, head, member) \ + (list_is_first(&(pos)->member, head) ? \ + list_last_entry(head, typeof(*(pos)), member) : list_prev_entry(pos, member)) + +/** * list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. @@ -580,6 +606,16 @@ static inline void list_splice_tail_init(struct list_head *list, for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) /** + * list_for_each_rcu - Iterate over a list in an RCU-safe fashion + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_rcu(pos, head) \ + for (pos = rcu_dereference((head)->next); \ + !list_is_head(pos, (head)); \ + pos = rcu_dereference(pos->next)) + +/** * list_for_each_continue - continue iteration over a list * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. diff --git a/include/linux/mdio.h b/include/linux/mdio.h index ecac96d52e01..00177567cfef 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -340,6 +340,76 @@ static inline void mii_10gbt_stat_mod_linkmode_lpa_t(unsigned long *advertising, advertising, lpa & MDIO_AN_10GBT_STAT_LP10G); } +/** + * mii_t1_adv_l_mod_linkmode_t + * @advertising: target the linkmode advertisement settings + * @lpa: value of the BASE-T1 Autonegotiation Advertisement [15:0] Register + * + * A small helper function that translates BASE-T1 Autonegotiation + * Advertisement [15:0] Register bits to linkmode advertisement settings. + * Other bits in advertising aren't changed. + */ +static inline void mii_t1_adv_l_mod_linkmode_t(unsigned long *advertising, u32 lpa) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising, + lpa & MDIO_AN_T1_ADV_L_PAUSE_CAP); + linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising, + lpa & MDIO_AN_T1_ADV_L_PAUSE_ASYM); +} + +/** + * mii_t1_adv_m_mod_linkmode_t + * @advertising: target the linkmode advertisement settings + * @lpa: value of the BASE-T1 Autonegotiation Advertisement [31:16] Register + * + * A small helper function that translates BASE-T1 Autonegotiation + * Advertisement [31:16] Register bits to linkmode advertisement settings. + * Other bits in advertising aren't changed. + */ +static inline void mii_t1_adv_m_mod_linkmode_t(unsigned long *advertising, u32 lpa) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, + advertising, lpa & MDIO_AN_T1_ADV_M_B10L); +} + +/** + * linkmode_adv_to_mii_t1_adv_l_t + * @advertising: the linkmode advertisement settings + * + * A small helper function that translates linkmode advertisement + * settings to phy autonegotiation advertisements for the + * BASE-T1 Autonegotiation Advertisement [15:0] Register. + */ +static inline u32 linkmode_adv_to_mii_t1_adv_l_t(unsigned long *advertising) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising)) + result |= MDIO_AN_T1_ADV_L_PAUSE_CAP; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising)) + result |= MDIO_AN_T1_ADV_L_PAUSE_ASYM; + + return result; +} + +/** + * linkmode_adv_to_mii_t1_adv_m_t + * @advertising: the linkmode advertisement settings + * + * A small helper function that translates linkmode advertisement + * settings to phy autonegotiation advertisements for the + * BASE-T1 Autonegotiation Advertisement [31:16] Register. + */ +static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, advertising)) + result |= MDIO_AN_T1_ADV_M_B10L; + + return result; +} + int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, diff --git a/include/linux/mfd/idt8a340_reg.h b/include/linux/mfd/idt8a340_reg.h index a18c1539a152..0c706085c205 100644 --- a/include/linux/mfd/idt8a340_reg.h +++ b/include/linux/mfd/idt8a340_reg.h @@ -407,7 +407,7 @@ #define TOD_READ_PRIMARY_0 0xcc40 #define TOD_READ_PRIMARY_0_V520 0xcc50 /* 8-bit subns, 32-bit ns, 48-bit seconds */ -#define TOD_READ_PRIMARY 0x0000 +#define TOD_READ_PRIMARY_BASE 0x0000 /* Counter increments after TOD write is completed */ #define TOD_READ_PRIMARY_COUNTER 0x000b /* Read trigger configuration */ @@ -424,6 +424,16 @@ #define TOD_READ_SECONDARY_0 0xcc90 #define TOD_READ_SECONDARY_0_V520 0xcca0 +/* 8-bit subns, 32-bit ns, 48-bit seconds */ +#define TOD_READ_SECONDARY_BASE 0x0000 +/* Counter increments after TOD write is completed */ +#define TOD_READ_SECONDARY_COUNTER 0x000b +/* Read trigger configuration */ +#define TOD_READ_SECONDARY_SEL_CFG_0 0x000c +/* Read trigger selection */ +#define TOD_READ_SECONDARY_CMD 0x000e +#define TOD_READ_SECONDARY_CMD_V520 0x000f + #define TOD_READ_SECONDARY_1 0xcca0 #define TOD_READ_SECONDARY_1_V520 0xccb0 #define TOD_READ_SECONDARY_2 0xccb0 diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h deleted file mode 100644 index dacf69516002..000000000000 --- a/include/linux/mlx5/accel.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5_ACCEL_H__ -#define __MLX5_ACCEL_H__ - -#include <linux/mlx5/driver.h> - -enum mlx5_accel_esp_aes_gcm_keymat_iv_algo { - MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ, -}; - -enum mlx5_accel_esp_flags { - MLX5_ACCEL_ESP_FLAGS_TUNNEL = 0, /* Default */ - MLX5_ACCEL_ESP_FLAGS_TRANSPORT = 1UL << 0, - MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED = 1UL << 1, - MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP = 1UL << 2, -}; - -enum mlx5_accel_esp_action { - MLX5_ACCEL_ESP_ACTION_DECRYPT, - MLX5_ACCEL_ESP_ACTION_ENCRYPT, -}; - -enum mlx5_accel_esp_keymats { - MLX5_ACCEL_ESP_KEYMAT_AES_NONE, - MLX5_ACCEL_ESP_KEYMAT_AES_GCM, -}; - -enum mlx5_accel_esp_replay { - MLX5_ACCEL_ESP_REPLAY_NONE, - MLX5_ACCEL_ESP_REPLAY_BMP, -}; - -struct aes_gcm_keymat { - u64 seq_iv; - enum mlx5_accel_esp_aes_gcm_keymat_iv_algo iv_algo; - - u32 salt; - u32 icv_len; - - u32 key_len; - u32 aes_key[256 / 32]; -}; - -struct mlx5_accel_esp_xfrm_attrs { - enum mlx5_accel_esp_action action; - u32 esn; - __be32 spi; - u32 seq; - u32 tfc_pad; - u32 flags; - u32 sa_handle; - enum mlx5_accel_esp_replay replay_type; - union { - struct { - u32 size; - - } bmp; - } replay; - enum mlx5_accel_esp_keymats keymat_type; - union { - struct aes_gcm_keymat aes_gcm; - } keymat; - - union { - __be32 a4; - __be32 a6[4]; - } saddr; - - union { - __be32 a4; - __be32 a6[4]; - } daddr; - - u8 is_ipv6; -}; - -struct mlx5_accel_esp_xfrm { - struct mlx5_core_dev *mdev; - struct mlx5_accel_esp_xfrm_attrs attrs; -}; - -enum { - MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA = 1UL << 0, -}; - -enum mlx5_accel_ipsec_cap { - MLX5_ACCEL_IPSEC_CAP_DEVICE = 1 << 0, - MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA = 1 << 1, - MLX5_ACCEL_IPSEC_CAP_ESP = 1 << 2, - MLX5_ACCEL_IPSEC_CAP_IPV6 = 1 << 3, - MLX5_ACCEL_IPSEC_CAP_LSO = 1 << 4, - MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER = 1 << 5, - MLX5_ACCEL_IPSEC_CAP_ESN = 1 << 6, - MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN = 1 << 7, -}; - -#ifdef CONFIG_MLX5_ACCEL - -u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev); - -struct mlx5_accel_esp_xfrm * -mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags); -void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm); -int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs); - -#else - -static inline u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; } - -static inline struct mlx5_accel_esp_xfrm * -mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) { return ERR_PTR(-EOPNOTSUPP); } -static inline void -mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) {} -static inline int -mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs) { return -EOPNOTSUPP; } - -#endif /* CONFIG_MLX5_ACCEL */ -#endif /* __MLX5_ACCEL_H__ */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9424503eb8d3..b064bc278f52 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -84,7 +84,7 @@ enum mlx5_sqp_t { }; enum { - MLX5_MAX_PORTS = 2, + MLX5_MAX_PORTS = 4, }; enum { @@ -272,6 +272,8 @@ struct mlx5_cmd_stats { u32 last_failed_errno; /* last bad status returned by FW */ u8 last_failed_mbox_status; + /* last command failed syndrome returned by FW */ + u32 last_failed_syndrome; struct dentry *root; /* protect command average calculations */ spinlock_t lock; @@ -558,6 +560,7 @@ struct mlx5_debugfs_entries { struct dentry *cq_debugfs; struct dentry *cmdif_debugfs; struct dentry *pages_debugfs; + struct dentry *lag_debugfs; }; struct mlx5_ft_pool; @@ -632,6 +635,7 @@ enum mlx5_device_state { enum mlx5_interface_state { MLX5_INTERFACE_STATE_UP = BIT(0), + MLX5_BREAK_FW_WAIT = BIT(1), }; enum mlx5_pci_status { @@ -778,9 +782,6 @@ struct mlx5_core_dev { #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif -#ifdef CONFIG_MLX5_ACCEL - const struct mlx5_accel_ipsec_ops *ipsec_ops; -#endif struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; struct mlx5_fw_tracer *tracer; @@ -1052,9 +1053,14 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, u16 reg_num, int arg, int write); -int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node); + +static inline int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db) +{ + return mlx5_db_alloc_node(dev, db, dev->priv.numa_node); +} + void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); const char *mlx5_command_str(int command); @@ -1144,6 +1150,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, int num_counters, size_t *offsets); struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev); +u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index e3bfed68b08a..8135713b0d2d 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -40,6 +40,18 @@ #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) +enum mlx5_flow_destination_type { + MLX5_FLOW_DESTINATION_TYPE_NONE, + MLX5_FLOW_DESTINATION_TYPE_VPORT, + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, + MLX5_FLOW_DESTINATION_TYPE_TIR, + MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER, + MLX5_FLOW_DESTINATION_TYPE_UPLINK, + MLX5_FLOW_DESTINATION_TYPE_PORT, + MLX5_FLOW_DESTINATION_TYPE_COUNTER, + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM, +}; + enum { MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, MLX5_FLOW_CONTEXT_ACTION_ENCRYPT = 1 << 17, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7d2d0ba82144..78b3d3465dd7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1359,7 +1359,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 vhca_resource_manager[0x1]; u8 hca_cap_2[0x1]; - u8 reserved_at_21[0x1]; + u8 create_lag_when_not_master_up[0x1]; u8 dtor[0x1]; u8 event_on_vhca_state_teardown_request[0x1]; u8 event_on_vhca_state_in_use[0x1]; @@ -1806,16 +1806,12 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_c0[0x740]; }; -enum mlx5_flow_destination_type { - MLX5_FLOW_DESTINATION_TYPE_VPORT = 0x0, - MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, - MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2, - MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER = 0x6, - MLX5_FLOW_DESTINATION_TYPE_UPLINK = 0x8, - - MLX5_FLOW_DESTINATION_TYPE_PORT = 0x99, - MLX5_FLOW_DESTINATION_TYPE_COUNTER = 0x100, - MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM = 0x101, +enum mlx5_ifc_flow_destination_type { + MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT = 0x0, + MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, + MLX5_IFC_FLOW_DESTINATION_TYPE_TIR = 0x2, + MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER = 0x6, + MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK = 0x8, }; enum mlx5_flow_table_miss_action { @@ -10820,7 +10816,8 @@ struct mlx5_ifc_dcbx_param_bits { enum { MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY = 0, - MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT, + MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT = 1, + MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW = 2, }; struct mlx5_ifc_lagc_bits { @@ -11383,8 +11380,6 @@ enum { enum { MLX5_IPSEC_OBJECT_ICV_LEN_16B, - MLX5_IPSEC_OBJECT_ICV_LEN_12B, - MLX5_IPSEC_OBJECT_ICV_LEN_8B, }; struct mlx5_ifc_ipsec_obj_bits { diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 07d77323f78a..45c7c0d67635 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -54,7 +54,6 @@ enum { enum { MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC = 0x2, - MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS = 0x3, }; struct mlx5_ifc_fpga_shell_caps_bits { @@ -387,89 +386,6 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_tls_extended_cap_bits { - u8 aes_gcm_128[0x1]; - u8 aes_gcm_256[0x1]; - u8 reserved_at_2[0x1e]; - u8 reserved_at_20[0x20]; - u8 context_capacity_total[0x20]; - u8 context_capacity_rx[0x20]; - u8 context_capacity_tx[0x20]; - u8 reserved_at_a0[0x10]; - u8 tls_counter_size[0x10]; - u8 tls_counters_addr_low[0x20]; - u8 tls_counters_addr_high[0x20]; - u8 rx[0x1]; - u8 tx[0x1]; - u8 tls_v12[0x1]; - u8 tls_v13[0x1]; - u8 lro[0x1]; - u8 ipv6[0x1]; - u8 reserved_at_106[0x1a]; -}; - -struct mlx5_ifc_ipsec_extended_cap_bits { - u8 encapsulation[0x20]; - - u8 reserved_0[0x12]; - u8 v2_command[0x1]; - u8 udp_encap[0x1]; - u8 rx_no_trailer[0x1]; - u8 ipv4_fragment[0x1]; - u8 ipv6[0x1]; - u8 esn[0x1]; - u8 lso[0x1]; - u8 transport_and_tunnel_mode[0x1]; - u8 tunnel_mode[0x1]; - u8 transport_mode[0x1]; - u8 ah_esp[0x1]; - u8 esp[0x1]; - u8 ah[0x1]; - u8 ipv4_options[0x1]; - - u8 auth_alg[0x20]; - - u8 enc_alg[0x20]; - - u8 sa_cap[0x20]; - - u8 reserved_1[0x10]; - u8 number_of_ipsec_counters[0x10]; - - u8 ipsec_counters_addr_low[0x20]; - u8 ipsec_counters_addr_high[0x20]; -}; - -struct mlx5_ifc_ipsec_counters_bits { - u8 dec_in_packets[0x40]; - - u8 dec_out_packets[0x40]; - - u8 dec_bypass_packets[0x40]; - - u8 enc_in_packets[0x40]; - - u8 enc_out_packets[0x40]; - - u8 enc_bypass_packets[0x40]; - - u8 drop_dec_packets[0x40]; - - u8 failed_auth_dec_packets[0x40]; - - u8 drop_enc_packets[0x40]; - - u8 success_add_sa[0x40]; - - u8 fail_add_sa[0x40]; - - u8 success_delete_sa[0x40]; - - u8 fail_delete_sa[0x40]; - - u8 dropped_cmd[0x40]; -}; - enum { MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RETRY_COUNTER_EXPIRED = 0x1, MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RNR_EXPIRED = 0x2, @@ -486,131 +402,4 @@ struct mlx5_ifc_fpga_qp_error_event_bits { u8 reserved_at_c0[0x8]; u8 fpga_qpn[0x18]; }; -enum mlx5_ifc_fpga_ipsec_response_syndrome { - MLX5_FPGA_IPSEC_RESPONSE_SUCCESS = 0, - MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1, - MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE = 2, - MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3, -}; - -struct mlx5_ifc_fpga_ipsec_cmd_resp { - __be32 syndrome; - union { - __be32 sw_sa_handle; - __be32 flags; - }; - u8 reserved[24]; -} __packed; - -enum mlx5_ifc_fpga_ipsec_cmd_opcode { - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA = 0, - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA = 1, - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 = 2, - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 = 3, - MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2 = 4, - MLX5_FPGA_IPSEC_CMD_OP_SET_CAP = 5, -}; - -enum mlx5_ifc_fpga_ipsec_cap { - MLX5_FPGA_IPSEC_CAP_NO_TRAILER = BIT(0), -}; - -struct mlx5_ifc_fpga_ipsec_cmd_cap { - __be32 cmd; - __be32 flags; - u8 reserved[24]; -} __packed; - -enum mlx5_ifc_fpga_ipsec_sa_flags { - MLX5_FPGA_IPSEC_SA_ESN_EN = BIT(0), - MLX5_FPGA_IPSEC_SA_ESN_OVERLAP = BIT(1), - MLX5_FPGA_IPSEC_SA_IPV6 = BIT(2), - MLX5_FPGA_IPSEC_SA_DIR_SX = BIT(3), - MLX5_FPGA_IPSEC_SA_SPI_EN = BIT(4), - MLX5_FPGA_IPSEC_SA_SA_VALID = BIT(5), - MLX5_FPGA_IPSEC_SA_IP_ESP = BIT(6), - MLX5_FPGA_IPSEC_SA_IP_AH = BIT(7), -}; - -enum mlx5_ifc_fpga_ipsec_sa_enc_mode { - MLX5_FPGA_IPSEC_SA_ENC_MODE_NONE = 0, - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128 = 1, - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128 = 3, -}; - -struct mlx5_ifc_fpga_ipsec_sa_v1 { - __be32 cmd; - u8 key_enc[32]; - u8 key_auth[32]; - __be32 sip[4]; - __be32 dip[4]; - union { - struct { - __be32 reserved; - u8 salt_iv[8]; - __be32 salt; - } __packed gcm; - struct { - u8 salt[16]; - } __packed cbc; - }; - __be32 spi; - __be32 sw_sa_handle; - __be16 tfclen; - u8 enc_mode; - u8 reserved1[2]; - u8 flags; - u8 reserved2[2]; -}; - -struct mlx5_ifc_fpga_ipsec_sa { - struct mlx5_ifc_fpga_ipsec_sa_v1 ipsec_sa_v1; - __be16 udp_sp; - __be16 udp_dp; - u8 reserved1[4]; - __be32 esn; - __be16 vid; /* only 12 bits, rest is reserved */ - __be16 reserved2; -} __packed; - -enum fpga_tls_cmds { - CMD_SETUP_STREAM = 0x1001, - CMD_TEARDOWN_STREAM = 0x1002, - CMD_RESYNC_RX = 0x1003, -}; - -#define MLX5_TLS_1_2 (0) - -#define MLX5_TLS_ALG_AES_GCM_128 (0) -#define MLX5_TLS_ALG_AES_GCM_256 (1) - -struct mlx5_ifc_tls_cmd_bits { - u8 command_type[0x20]; - u8 ipv6[0x1]; - u8 direction_sx[0x1]; - u8 tls_version[0x2]; - u8 reserved[0x1c]; - u8 swid[0x20]; - u8 src_port[0x10]; - u8 dst_port[0x10]; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; - u8 tls_rcd_sn[0x40]; - u8 tcp_sn[0x20]; - u8 tls_implicit_iv[0x20]; - u8 tls_xor_iv[0x40]; - u8 encryption_key[0x100]; - u8 alg[4]; - u8 reserved2[0x1c]; - u8 reserved3[0x4a0]; -}; - -struct mlx5_ifc_tls_resp_bits { - u8 syndrome[0x20]; - u8 stream_id[0x20]; - u8 reserved[0x40]; -}; - -#define MLX5_TLS_COMMAND_SIZE (0x100) - #endif /* MLX5_IFC_FPGA_H */ diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 28a928b0684b..e96ee1e348cb 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -141,7 +141,7 @@ enum mlx5_ptys_width { MLX5_PTYS_WIDTH_12X = 1 << 4, }; -#define MLX5E_PROT_MASK(link_mode) (1 << link_mode) +#define MLX5E_PROT_MASK(link_mode) (1U << link_mode) #define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ (ext ? MLX5_GET(reg, out, ext_##field) : \ MLX5_GET(reg, out, field)) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f736c020cde2..f615a66c89e9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -50,6 +50,7 @@ #include <linux/hashtable.h> #include <linux/rbtree.h> #include <net/net_trackers.h> +#include <net/net_debug.h> struct netpoll_info; struct device; @@ -59,7 +60,8 @@ struct dsa_port; struct ip_tunnel_parm; struct macsec_context; struct macsec_ops; - +struct netdev_name_node; +struct sd_flow_limit; struct sfp_bus; /* 802.11 specific */ struct wireless_dev; @@ -202,6 +204,7 @@ struct net_device_core_stats { unsigned long rx_dropped; unsigned long tx_dropped; unsigned long rx_nohandler; + unsigned long rx_otherhost_dropped; } __aligned(4 * sizeof(unsigned long)); #include <linux/cache.h> @@ -862,6 +865,7 @@ enum net_device_path_type { DEV_PATH_BRIDGE, DEV_PATH_PPPOE, DEV_PATH_DSA, + DEV_PATH_MTK_WDMA, }; struct net_device_path { @@ -887,6 +891,12 @@ struct net_device_path { int port; u16 proto; } dsa; + struct { + u8 wdma_idx; + u8 queue; + u16 wcid; + u8 bss; + } mtk_wdma; }; }; @@ -1013,16 +1023,6 @@ struct dev_ifalias { struct devlink; struct tlsdev_ops; -struct netdev_name_node { - struct hlist_node hlist; - struct list_head list; - struct net_device *dev; - const char *name; -}; - -int netdev_name_node_alt_create(struct net_device *dev, const char *name); -int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); - struct netdev_net_notifier { struct list_head list; struct notifier_block *nb; @@ -1261,6 +1261,10 @@ struct netdev_net_notifier { * struct net_device *dev, * const unsigned char *addr, u16 vid) * Deletes the FDB entry from dev coresponding to addr. + * int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, struct nlattr *tb[], + * struct net_device *dev, + * u16 vid, + * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, * int *idx) @@ -1353,6 +1357,12 @@ struct netdev_net_notifier { * The caller must be under RCU read context. * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path); * Get the forwarding path to reach the real device from the HW destination address + * ktime_t (*ndo_get_tstamp)(struct net_device *dev, + * const struct skb_shared_hwtstamps *hwtstamps, + * bool cycles); + * Get hardware timestamp based on normal/adjustable time or free running + * cycle counter. This function is required if physical clock supports a + * free running cycle counter. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1510,7 +1520,12 @@ struct net_device_ops { struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, - u16 vid); + u16 vid, struct netlink_ext_ack *extack); + int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + u16 vid, + struct netlink_ext_ack *extack); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, @@ -1570,6 +1585,9 @@ struct net_device_ops { struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path); + ktime_t (*ndo_get_tstamp)(struct net_device *dev, + const struct skb_shared_hwtstamps *hwtstamps, + bool cycles); }; /** @@ -1909,8 +1927,10 @@ enum netdev_ml_priv_type { * @rtnl_link_ops: Rtnl_link_ops * * @gso_max_size: Maximum size of generic segmentation offload + * @tso_max_size: Device (as in HW) limit on the max TSO request size * @gso_max_segs: Maximum number of segments that can be passed to the * NIC for GSO + * @tso_max_segs: Device (as in HW) limit on the max TSO segment count * * @dcbnl_ops: Data Center Bridging netlink ops * @num_tc: Number of traffic classes in the net device @@ -2099,6 +2119,8 @@ struct net_device { /* Protocol-specific pointers */ + struct in_device __rcu *ip_ptr; + struct inet6_dev __rcu *ip6_ptr; #if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_info __rcu *vlan_info; #endif @@ -2111,16 +2133,18 @@ struct net_device { #if IS_ENABLED(CONFIG_ATALK) void *atalk_ptr; #endif - struct in_device __rcu *ip_ptr; #if IS_ENABLED(CONFIG_DECNET) struct dn_dev __rcu *dn_ptr; #endif - struct inet6_dev __rcu *ip6_ptr; #if IS_ENABLED(CONFIG_AX25) void *ax25_ptr; #endif +#if IS_ENABLED(CONFIG_CFG80211) struct wireless_dev *ieee80211_ptr; +#endif +#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN) struct wpan_dev *ieee802154_ptr; +#endif #if IS_ENABLED(CONFIG_MPLS_ROUTING) struct mpls_dev __rcu *mpls_ptr; #endif @@ -2141,7 +2165,11 @@ struct net_device { struct bpf_prog __rcu *xdp_prog; unsigned long gro_flush_timeout; int napi_defer_hard_irqs; -#define GRO_MAX_SIZE 65536 +#define GRO_LEGACY_MAX_SIZE 65536u +/* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), + * and shinfo->gso_segs is a 16bit field. + */ +#define GRO_MAX_SIZE (8 * 65535u) unsigned int gro_max_size; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; @@ -2252,10 +2280,20 @@ struct net_device { const struct rtnl_link_ops *rtnl_link_ops; /* for setting kernel sock attribute on TCP connection setup */ -#define GSO_MAX_SIZE 65536 +#define GSO_MAX_SEGS 65535u +#define GSO_LEGACY_MAX_SIZE 65536u +/* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), + * and shinfo->gso_segs is a 16bit field. + */ +#define GSO_MAX_SIZE (8 * GSO_MAX_SEGS) + unsigned int gso_max_size; -#define GSO_MAX_SEGS 65535 +#define TSO_LEGACY_MAX_SIZE 65536 +#define TSO_MAX_SIZE UINT_MAX + unsigned int tso_max_size; u16 gso_max_segs; +#define TSO_MAX_SEGS U16_MAX + u16 tso_max_segs; #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; @@ -2491,37 +2529,53 @@ static inline void *netdev_priv(const struct net_device *dev) */ #define NAPI_POLL_WEIGHT 64 +void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight); + /** - * netif_napi_add - initialize a NAPI context - * @dev: network device - * @napi: NAPI context - * @poll: polling function - * @weight: default weight + * netif_napi_add() - initialize a NAPI context + * @dev: network device + * @napi: NAPI context + * @poll: polling function + * @weight: default weight * * netif_napi_add() must be used to initialize a NAPI context prior to calling * *any* of the other NAPI-related functions. */ -void netif_napi_add(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight); +static inline void +netif_napi_add(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight) +{ + netif_napi_add_weight(dev, napi, poll, weight); +} + +static inline void +netif_napi_add_tx_weight(struct net_device *dev, + struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), + int weight) +{ + set_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state); + netif_napi_add_weight(dev, napi, poll, weight); +} + +#define netif_tx_napi_add netif_napi_add_tx_weight /** - * netif_tx_napi_add - initialize a NAPI context - * @dev: network device - * @napi: NAPI context - * @poll: polling function - * @weight: default weight + * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only + * @dev: network device + * @napi: NAPI context + * @poll: polling function * * This variant of netif_napi_add() should be used from drivers using NAPI * to exclusively poll a TX queue. * This will avoid we add it into napi_hash[], thus polluting this hash table. */ -static inline void netif_tx_napi_add(struct net_device *dev, +static inline void netif_napi_add_tx(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), - int weight) + int (*poll)(struct napi_struct *, int)) { - set_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state); - netif_napi_add(dev, napi, poll, weight); + netif_napi_add_tx_weight(dev, napi, poll, NAPI_POLL_WEIGHT); } /** @@ -2932,10 +2986,20 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); -int dev_queue_xmit(struct sk_buff *skb); -int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); +int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev); int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id); +static inline int dev_queue_xmit(struct sk_buff *skb) +{ + return __dev_queue_xmit(skb, NULL); +} + +static inline int dev_queue_xmit_accel(struct sk_buff *skb, + struct net_device *sb_dev) +{ + return __dev_queue_xmit(skb, sb_dev); +} + static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) { int ret; @@ -2968,7 +3032,6 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); struct net_device *dev_get_by_napi_id(unsigned int napi_id); -int netdev_get_name(struct net *net, char *name, int ifindex); int dev_restart(struct net_device *dev); @@ -3027,19 +3090,6 @@ static inline bool dev_has_header(const struct net_device *dev) return dev->header_ops && dev->header_ops->create; } -#ifdef CONFIG_NET_FLOW_LIMIT -#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ -struct sd_flow_limit { - u64 count; - unsigned int num_buckets; - unsigned int history_head; - u16 history[FLOW_LIMIT_HISTORY]; - u8 buckets[]; -}; - -extern int netdev_flow_limit_table_len; -#endif /* CONFIG_NET_FLOW_LIMIT */ - /* * Incoming packets are placed on per-CPU queues */ @@ -3067,6 +3117,9 @@ struct softnet_data { struct { u16 recursion; u8 more; +#ifdef CONFIG_NET_EGRESS + u8 skip_txqueue; +#endif } xmit; #ifdef CONFIG_RPS /* input_queue_head should be written by cpu owning this struct, @@ -3084,6 +3137,12 @@ struct softnet_data { struct sk_buff_head input_pkt_queue; struct napi_struct backlog; + /* Another possibly contended cache line */ + spinlock_t defer_lock ____cacheline_aligned_in_smp; + int defer_count; + int defer_ipi_scheduled; + struct sk_buff *defer_list; + call_single_data_t defer_csd; }; static inline void input_queue_head_incr(struct softnet_data *sd) @@ -3763,7 +3822,6 @@ int dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); void __dev_notify_flags(struct net_device *, unsigned int old_flags, unsigned int gchanges); -int dev_change_name(struct net_device *, const char *); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); int __dev_change_net_namespace(struct net_device *dev, struct net *net, @@ -3775,13 +3833,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, return __dev_change_net_namespace(dev, net, pat, 0); } int __dev_set_mtu(struct net_device *, int); -int dev_validate_mtu(struct net_device *dev, int mtu, - struct netlink_ext_ack *extack); -int dev_set_mtu_ext(struct net_device *dev, int mtu, - struct netlink_ext_ack *extack); int dev_set_mtu(struct net_device *, int); -int dev_change_tx_queue_len(struct net_device *, unsigned long); -void dev_set_group(struct net_device *, int); int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, @@ -3789,24 +3841,13 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); -int dev_change_carrier(struct net_device *, bool new_carrier); -int dev_get_phys_port_id(struct net_device *dev, - struct netdev_phys_item_id *ppid); -int dev_get_phys_port_name(struct net_device *dev, - char *name, size_t len); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); -int dev_change_proto_down(struct net_device *dev, bool proto_down); -void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, - u32 value); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); -int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, - int fd, int expected_fd, u32 flags); int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); @@ -3868,6 +3909,7 @@ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) DEV_CORE_STATS_INC(rx_nohandler) +DEV_CORE_STATS_INC(rx_otherhost_dropped) static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, @@ -3888,12 +3930,6 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, bool dev_nit_active(struct net_device *dev); void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); -extern int netdev_budget; -extern unsigned int netdev_budget_usecs; - -/* Called by rtnetlink.c:rtnl_unlock() */ -void netdev_run_todo(void); - static inline void __dev_put(struct net_device *dev) { if (dev) { @@ -4010,10 +4046,7 @@ static inline void dev_replace_track(struct net_device *odev, * called netif_lowerlayer_*() because they represent the state of any * kind of lower layer not just hardware media. */ - -void linkwatch_init_dev(struct net_device *dev); void linkwatch_fire_event(struct net_device *dev); -void linkwatch_forget_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present @@ -4459,9 +4492,6 @@ int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); -void dev_addr_flush(struct net_device *dev); -int dev_addr_init(struct net_device *dev); -void dev_addr_check(struct net_device *dev); /* Functions used for unicast addresses handling */ int dev_uc_add(struct net_device *dev, const unsigned char *addr); @@ -4551,7 +4581,6 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); -void __dev_set_rx_mode(struct net_device *dev); int dev_set_promiscuity(struct net_device *dev, int inc); int dev_set_allmulti(struct net_device *dev, int inc); void netdev_state_change(struct net_device *dev); @@ -4569,11 +4598,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); extern int netdev_max_backlog; -extern int netdev_tstamp_prequeue; -extern int netdev_unregister_timeout_secs; -extern int weight_p; -extern int dev_weight_rx_bias; -extern int dev_weight_tx_bias; extern int dev_rx_weight; extern int dev_tx_weight; extern int gro_normal_batch; @@ -4761,11 +4785,17 @@ static inline void netdev_rx_csum_fault(struct net_device *dev, void net_enable_timestamp(void); void net_disable_timestamp(void); -#ifdef CONFIG_PROC_FS -int __init dev_proc_init(void); -#else -#define dev_proc_init() 0 -#endif +static inline ktime_t netdev_get_tstamp(struct net_device *dev, + const struct skb_shared_hwtstamps *hwtstamps, + bool cycles) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_get_tstamp) + return ops->ndo_get_tstamp(dev, hwtstamps, cycles); + + return hwtstamps->hwtstamp; +} static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, struct sk_buff *skb, struct net_device *dev, @@ -4802,8 +4832,6 @@ extern const struct kobj_ns_type_operations net_ns_type_operations; const char *netdev_drivername(const struct net_device *dev); -void linkwatch_run_queue(void); - static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, netdev_features_t f2) { @@ -4889,26 +4917,10 @@ static inline bool netif_needs_gso(struct sk_buff *skb, (skb->ip_summed != CHECKSUM_UNNECESSARY))); } -static inline void netif_set_gso_max_size(struct net_device *dev, - unsigned int size) -{ - /* dev->gso_max_size is read locklessly from sk_setup_caps() */ - WRITE_ONCE(dev->gso_max_size, size); -} - -static inline void netif_set_gso_max_segs(struct net_device *dev, - unsigned int segs) -{ - /* dev->gso_max_segs is read locklessly from sk_setup_caps() */ - WRITE_ONCE(dev->gso_max_segs, segs); -} - -static inline void netif_set_gro_max_size(struct net_device *dev, - unsigned int size) -{ - /* This pairs with the READ_ONCE() in skb_gro_receive() */ - WRITE_ONCE(dev->gro_max_size, size); -} +void netif_set_tso_max_size(struct net_device *dev, unsigned int size); +void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs); +void netif_inherit_tso_max(struct net_device *to, + const struct net_device *from); static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, int pulled_hlen, u16 mac_offset, @@ -5074,81 +5086,9 @@ static inline const char *netdev_reg_state(const struct net_device *dev) return " (unknown)"; } -__printf(3, 4) __cold -void netdev_printk(const char *level, const struct net_device *dev, - const char *format, ...); -__printf(2, 3) __cold -void netdev_emerg(const struct net_device *dev, const char *format, ...); -__printf(2, 3) __cold -void netdev_alert(const struct net_device *dev, const char *format, ...); -__printf(2, 3) __cold -void netdev_crit(const struct net_device *dev, const char *format, ...); -__printf(2, 3) __cold -void netdev_err(const struct net_device *dev, const char *format, ...); -__printf(2, 3) __cold -void netdev_warn(const struct net_device *dev, const char *format, ...); -__printf(2, 3) __cold -void netdev_notice(const struct net_device *dev, const char *format, ...); -__printf(2, 3) __cold -void netdev_info(const struct net_device *dev, const char *format, ...); - -#define netdev_level_once(level, dev, fmt, ...) \ -do { \ - static bool __section(".data.once") __print_once; \ - \ - if (!__print_once) { \ - __print_once = true; \ - netdev_printk(level, dev, fmt, ##__VA_ARGS__); \ - } \ -} while (0) - -#define netdev_emerg_once(dev, fmt, ...) \ - netdev_level_once(KERN_EMERG, dev, fmt, ##__VA_ARGS__) -#define netdev_alert_once(dev, fmt, ...) \ - netdev_level_once(KERN_ALERT, dev, fmt, ##__VA_ARGS__) -#define netdev_crit_once(dev, fmt, ...) \ - netdev_level_once(KERN_CRIT, dev, fmt, ##__VA_ARGS__) -#define netdev_err_once(dev, fmt, ...) \ - netdev_level_once(KERN_ERR, dev, fmt, ##__VA_ARGS__) -#define netdev_warn_once(dev, fmt, ...) \ - netdev_level_once(KERN_WARNING, dev, fmt, ##__VA_ARGS__) -#define netdev_notice_once(dev, fmt, ...) \ - netdev_level_once(KERN_NOTICE, dev, fmt, ##__VA_ARGS__) -#define netdev_info_once(dev, fmt, ...) \ - netdev_level_once(KERN_INFO, dev, fmt, ##__VA_ARGS__) - #define MODULE_ALIAS_NETDEV(device) \ MODULE_ALIAS("netdev-" device) -#if defined(CONFIG_DYNAMIC_DEBUG) || \ - (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) -#define netdev_dbg(__dev, format, args...) \ -do { \ - dynamic_netdev_dbg(__dev, format, ##args); \ -} while (0) -#elif defined(DEBUG) -#define netdev_dbg(__dev, format, args...) \ - netdev_printk(KERN_DEBUG, __dev, format, ##args) -#else -#define netdev_dbg(__dev, format, args...) \ -({ \ - if (0) \ - netdev_printk(KERN_DEBUG, __dev, format, ##args); \ -}) -#endif - -#if defined(VERBOSE_DEBUG) -#define netdev_vdbg netdev_dbg -#else - -#define netdev_vdbg(dev, format, args...) \ -({ \ - if (0) \ - netdev_printk(KERN_DEBUG, dev, format, ##args); \ - 0; \ -}) -#endif - /* * netdev_WARN() acts like dev_printk(), but with the key difference * of using a WARN/WARN_ON to get the message out, including the @@ -5162,74 +5102,6 @@ do { \ WARN_ONCE(1, "netdevice: %s%s: " format, netdev_name(dev), \ netdev_reg_state(dev), ##args) -/* netif printk helpers, similar to netdev_printk */ - -#define netif_printk(priv, type, level, dev, fmt, args...) \ -do { \ - if (netif_msg_##type(priv)) \ - netdev_printk(level, (dev), fmt, ##args); \ -} while (0) - -#define netif_level(level, priv, type, dev, fmt, args...) \ -do { \ - if (netif_msg_##type(priv)) \ - netdev_##level(dev, fmt, ##args); \ -} while (0) - -#define netif_emerg(priv, type, dev, fmt, args...) \ - netif_level(emerg, priv, type, dev, fmt, ##args) -#define netif_alert(priv, type, dev, fmt, args...) \ - netif_level(alert, priv, type, dev, fmt, ##args) -#define netif_crit(priv, type, dev, fmt, args...) \ - netif_level(crit, priv, type, dev, fmt, ##args) -#define netif_err(priv, type, dev, fmt, args...) \ - netif_level(err, priv, type, dev, fmt, ##args) -#define netif_warn(priv, type, dev, fmt, args...) \ - netif_level(warn, priv, type, dev, fmt, ##args) -#define netif_notice(priv, type, dev, fmt, args...) \ - netif_level(notice, priv, type, dev, fmt, ##args) -#define netif_info(priv, type, dev, fmt, args...) \ - netif_level(info, priv, type, dev, fmt, ##args) - -#if defined(CONFIG_DYNAMIC_DEBUG) || \ - (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) -#define netif_dbg(priv, type, netdev, format, args...) \ -do { \ - if (netif_msg_##type(priv)) \ - dynamic_netdev_dbg(netdev, format, ##args); \ -} while (0) -#elif defined(DEBUG) -#define netif_dbg(priv, type, dev, format, args...) \ - netif_printk(priv, type, KERN_DEBUG, dev, format, ##args) -#else -#define netif_dbg(priv, type, dev, format, args...) \ -({ \ - if (0) \ - netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ - 0; \ -}) -#endif - -/* if @cond then downgrade to debug, else print at @level */ -#define netif_cond_dbg(priv, type, netdev, cond, level, fmt, args...) \ - do { \ - if (cond) \ - netif_dbg(priv, type, netdev, fmt, ##args); \ - else \ - netif_ ## level(priv, type, netdev, fmt, ##args); \ - } while (0) - -#if defined(VERBOSE_DEBUG) -#define netif_vdbg netif_dbg -#else -#define netif_vdbg(priv, type, dev, format, args...) \ -({ \ - if (0) \ - netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \ - 0; \ -}) -#endif - /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. diff --git a/include/linux/phy.h b/include/linux/phy.h index 36ca2b5c2253..508f1149665b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -65,7 +65,7 @@ extern const int phy_basic_ports_array[3]; extern const int phy_fibre_port_array[1]; extern const int phy_all_ports_features_array[7]; extern const int phy_10_100_features_array[4]; -extern const int phy_basic_t1_features_array[2]; +extern const int phy_basic_t1_features_array[3]; extern const int phy_gbit_features_array[2]; extern const int phy_10gbit_features_array[1]; @@ -570,6 +570,7 @@ struct macsec_ops; * @autoneg_complete: Flag auto negotiation of the link has completed * @mdix: Current crossover * @mdix_ctrl: User setting of crossover + * @pma_extable: Cached value of PMA/PMD Extended Abilities Register * @interrupts: Flag interrupts have been enabled * @interface: enum phy_interface_t value * @skb: Netlink message for cable diagnostics @@ -698,6 +699,8 @@ struct phy_device { u8 mdix; u8 mdix_ctrl; + int pma_extable; + void (*phy_link_change)(struct phy_device *phydev, bool up); void (*adjust_link)(struct net_device *dev); @@ -1611,11 +1614,14 @@ int genphy_c45_read_link(struct phy_device *phydev); int genphy_c45_read_lpa(struct phy_device *phydev); int genphy_c45_read_pma(struct phy_device *phydev); int genphy_c45_pma_setup_forced(struct phy_device *phydev); +int genphy_c45_pma_baset1_setup_master_slave(struct phy_device *phydev); int genphy_c45_an_config_aneg(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); int genphy_c45_pma_read_abilities(struct phy_device *phydev); +int genphy_c45_pma_baset1_read_master_slave(struct phy_device *phydev); int genphy_c45_read_status(struct phy_device *phydev); +int genphy_c45_baset1_read_status(struct phy_device *phydev); int genphy_c45_config_aneg(struct phy_device *phydev); int genphy_c45_loopback(struct phy_device *phydev, bool enable); int genphy_c45_pma_resume(struct phy_device *phydev); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 223781622b33..6d06896fc20d 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -160,11 +160,6 @@ struct phylink_mac_ops { * clearing unsupported speeds and duplex settings. The port modes * should not be cleared; phylink_set_port_modes() will help with this. * - * If the @state->interface mode is %PHY_INTERFACE_MODE_1000BASEX - * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode - * based on @state->advertising and/or @state->speed and update - * @state->interface accordingly. See phylink_helper_basex_speed(). - * * When @config->supported_interfaces has been set, phylink will iterate * over the supported interfaces to determine the full capability of the * MAC. The validation function must not print errors if @state->interface @@ -579,7 +574,6 @@ int phylink_speed_up(struct phylink *pl); #define phylink_test(bm, mode) __phylink_do_bit(test_bit, bm, mode) void phylink_set_port_modes(unsigned long *bits); -void phylink_helper_basex_speed(struct phylink_link_state *state); void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, u16 bmsr, u16 lpa); diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index fefa7790dc46..2b6ea36ad162 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -43,6 +43,9 @@ #define OFF_PTP_SOURCE_UUID 22 /* PTPv1 only */ #define OFF_PTP_SEQUENCE_ID 30 +/* PTP header flag fields */ +#define PTP_FLAG_TWOSTEP BIT(1) + /* Below defines should actually be removed at some point in time. */ #define IP6_HLEN 40 #define UDP_HLEN 8 diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 554454cb8693..92b44161408e 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -108,6 +108,32 @@ struct ptp_system_timestamp { * @settime64: Set the current time on the hardware clock. * parameter ts: Time value to set. * + * @getcycles64: Reads the current free running cycle counter from the hardware + * clock. + * If @getcycles64 and @getcyclesx64 are not supported, then + * @gettime64 or @gettimex64 will be used as default + * implementation. + * parameter ts: Holds the result. + * + * @getcyclesx64: Reads the current free running cycle counter from the + * hardware clock and optionally also the system clock. + * If @getcycles64 and @getcyclesx64 are not supported, then + * @gettimex64 will be used as default implementation if + * available. + * parameter ts: Holds the PHC timestamp. + * parameter sts: If not NULL, it holds a pair of timestamps + * from the system clock. The first reading is made right before + * reading the lowest bits of the PHC timestamp and the second + * reading immediately follows that. + * + * @getcrosscycles: Reads the current free running cycle counter from the + * hardware clock and system clock simultaneously. + * If @getcycles64 and @getcyclesx64 are not supported, then + * @getcrosststamp will be used as default implementation if + * available. + * parameter cts: Contains timestamp (device,system) pair, + * where system time is realtime and monotonic. + * * @enable: Request driver to enable or disable an ancillary feature. * parameter request: Desired resource to enable or disable. * parameter on: Caller passes one to enable or zero to disable. @@ -155,6 +181,11 @@ struct ptp_clock_info { int (*getcrosststamp)(struct ptp_clock_info *ptp, struct system_device_crosststamp *cts); int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); + int (*getcycles64)(struct ptp_clock_info *ptp, struct timespec64 *ts); + int (*getcyclesx64)(struct ptp_clock_info *ptp, struct timespec64 *ts, + struct ptp_system_timestamp *sts); + int (*getcrosscycles)(struct ptp_clock_info *ptp, + struct system_device_crosststamp *cts); int (*enable)(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on); int (*verify)(struct ptp_clock_info *ptp, unsigned int pin, @@ -321,6 +352,10 @@ static inline int ptp_clock_index(struct ptp_clock *ptp) static inline int ptp_find_pin(struct ptp_clock *ptp, enum ptp_pin_function func, unsigned int chan) { return -1; } +static inline int ptp_find_pin_unlocked(struct ptp_clock *ptp, + enum ptp_pin_function func, + unsigned int chan) +{ return -1; } static inline int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) { return -EOPNOTSUPP; } @@ -349,17 +384,16 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); /** * ptp_convert_timestamp() - convert timestamp to a ptp vclock time * - * @hwtstamps: skb_shared_hwtstamps structure pointer + * @hwtstamp: timestamp * @vclock_index: phc index of ptp vclock. * * Returns converted timestamp, or 0 on error. */ -ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps, - int vclock_index); +ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index); #else static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index) { return 0; } -static inline ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps, +static inline ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index) { return 0; } diff --git a/include/linux/qed/qed_fcoe_if.h b/include/linux/qed/qed_fcoe_if.h index 16752eca5cbd..90e3045b2dcb 100644 --- a/include/linux/qed/qed_fcoe_if.h +++ b/include/linux/qed/qed_fcoe_if.h @@ -76,7 +76,7 @@ void qed_fcoe_set_pf_params(struct qed_dev *cdev, * @fill_dev_info: fills FCoE specific information * @param cdev * @param info - * @return 0 on sucesss, otherwise error value. + * @return 0 on success, otherwise error value. * @register_ops: register FCoE operations * @param cdev * @param ops - specified using qed_iscsi_cb_ops @@ -96,7 +96,7 @@ void qed_fcoe_set_pf_params(struct qed_dev *cdev, * connection. * @param p_doorbell - qed will fill the address of the * doorbell. - * return 0 on sucesss, otherwise error value. + * return 0 on success, otherwise error value. * @release_conn: release a previously acquired fcoe connection * @param cdev * @param handle - the connection handle. diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h index 494cdc3cd840..fbf7973ae9ba 100644 --- a/include/linux/qed/qed_iscsi_if.h +++ b/include/linux/qed/qed_iscsi_if.h @@ -133,7 +133,7 @@ struct qed_iscsi_cb_ops { * @fill_dev_info: fills iSCSI specific information * @param cdev * @param info - * @return 0 on sucesss, otherwise error value. + * @return 0 on success, otherwise error value. * @register_ops: register iscsi operations * @param cdev * @param ops - specified using qed_iscsi_cb_ops @@ -152,7 +152,7 @@ struct qed_iscsi_cb_ops { * connection. * @param p_doorbell - qed will fill the address of the * doorbell. - * @return 0 on sucesss, otherwise error value. + * @return 0 on success, otherwise error value. * @release_conn: release a previously acquired iscsi connection * @param cdev * @param handle - the connection handle. diff --git a/include/linux/qed/qed_nvmetcp_if.h b/include/linux/qed/qed_nvmetcp_if.h index 1d51df347560..bbfbfba51f37 100644 --- a/include/linux/qed/qed_nvmetcp_if.h +++ b/include/linux/qed/qed_nvmetcp_if.h @@ -132,7 +132,7 @@ struct nvmetcp_task_params { * connection. * @param p_doorbell - qed will fill the address of the * doorbell. - * @return 0 on sucesss, otherwise error value. + * @return 0 on success, otherwise error value. * @release_conn: release a previously acquired nvmetcp connection * @param cdev * @param handle - the connection handle. diff --git a/include/linux/qed/qed_nvmetcp_ip_services_if.h b/include/linux/qed/qed_nvmetcp_ip_services_if.h deleted file mode 100644 index 3604aee53796..000000000000 --- a/include/linux/qed/qed_nvmetcp_ip_services_if.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ -/* - * Copyright 2021 Marvell. All rights reserved. - */ - -#ifndef _QED_IP_SERVICES_IF_H -#define _QED_IP_SERVICES_IF_H - -#include <linux/types.h> -#include <net/route.h> -#include <net/ip6_route.h> -#include <linux/inetdevice.h> - -int qed_route_ipv4(struct sockaddr_storage *local_addr, - struct sockaddr_storage *remote_addr, - struct sockaddr *hardware_address, - struct net_device **ndev); -int qed_route_ipv6(struct sockaddr_storage *local_addr, - struct sockaddr_storage *remote_addr, - struct sockaddr *hardware_address, - struct net_device **ndev); -void qed_vlan_get_ndev(struct net_device **ndev, u16 *vlan_id); -struct pci_dev *qed_validate_ndev(struct net_device *ndev); -void qed_return_tcp_port(struct socket *sock); -int qed_fetch_tcp_port(struct sockaddr_storage local_ip_addr, - struct socket **sock, u16 *port); -__be16 qed_get_in_port(struct sockaddr_storage *sa); - -#endif /* _QED_IP_SERVICES_IF_H */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 7f970b16da3a..ae2c6a3cec5d 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -100,6 +100,7 @@ void net_dec_ingress_queue(void); #ifdef CONFIG_NET_EGRESS void net_inc_egress_queue(void); void net_dec_egress_queue(void); +void netdev_xmit_skip_txqueue(bool skip); #endif void rtnetlink_init(void); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 60820ab511d2..bd023dd38ae6 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -277,6 +277,10 @@ extern struct list_head *seq_list_start_head(struct list_head *head, extern struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos); +extern struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos); +extern struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos); +extern struct list_head *seq_list_next_rcu(void *v, struct list_head *head, loff_t *ppos); + /* * Helpers for iteration over hlist_head-s in seq_files */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a30cae8b0a5..da96f0d3e753 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -42,99 +42,114 @@ #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <linux/netfilter/nf_conntrack_common.h> #endif +#include <net/net_debug.h> -/* The interface for checksum offload between the stack and networking drivers +/** + * DOC: skb checksums + * + * The interface for checksum offload between the stack and networking drivers * is as follows... * - * A. IP checksum related features + * IP checksum related features + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * Drivers advertise checksum offload capabilities in the features of a device. * From the stack's point of view these are capabilities offered by the driver. * A driver typically only advertises features that it is capable of offloading * to its device. * - * The checksum related features are: - * - * NETIF_F_HW_CSUM - The driver (or its device) is able to compute one - * IP (one's complement) checksum for any combination - * of protocols or protocol layering. The checksum is - * computed and set in a packet per the CHECKSUM_PARTIAL - * interface (see below). - * - * NETIF_F_IP_CSUM - Driver (device) is only able to checksum plain - * TCP or UDP packets over IPv4. These are specifically - * unencapsulated packets of the form IPv4|TCP or - * IPv4|UDP where the Protocol field in the IPv4 header - * is TCP or UDP. The IPv4 header may contain IP options. - * This feature cannot be set in features for a device - * with NETIF_F_HW_CSUM also set. This feature is being - * DEPRECATED (see below). - * - * NETIF_F_IPV6_CSUM - Driver (device) is only able to checksum plain - * TCP or UDP packets over IPv6. These are specifically - * unencapsulated packets of the form IPv6|TCP or - * IPv6|UDP where the Next Header field in the IPv6 - * header is either TCP or UDP. IPv6 extension headers - * are not supported with this feature. This feature - * cannot be set in features for a device with - * NETIF_F_HW_CSUM also set. This feature is being - * DEPRECATED (see below). - * - * NETIF_F_RXCSUM - Driver (device) performs receive checksum offload. - * This flag is only used to disable the RX checksum - * feature for a device. The stack will accept receive - * checksum indication in packets received on a device - * regardless of whether NETIF_F_RXCSUM is set. - * - * B. Checksumming of received packets by device. Indication of checksum - * verification is set in skb->ip_summed. Possible values are: - * - * CHECKSUM_NONE: + * .. flat-table:: Checksum related device features + * :widths: 1 10 + * + * * - %NETIF_F_HW_CSUM + * - The driver (or its device) is able to compute one + * IP (one's complement) checksum for any combination + * of protocols or protocol layering. The checksum is + * computed and set in a packet per the CHECKSUM_PARTIAL + * interface (see below). + * + * * - %NETIF_F_IP_CSUM + * - Driver (device) is only able to checksum plain + * TCP or UDP packets over IPv4. These are specifically + * unencapsulated packets of the form IPv4|TCP or + * IPv4|UDP where the Protocol field in the IPv4 header + * is TCP or UDP. The IPv4 header may contain IP options. + * This feature cannot be set in features for a device + * with NETIF_F_HW_CSUM also set. This feature is being + * DEPRECATED (see below). + * + * * - %NETIF_F_IPV6_CSUM + * - Driver (device) is only able to checksum plain + * TCP or UDP packets over IPv6. These are specifically + * unencapsulated packets of the form IPv6|TCP or + * IPv6|UDP where the Next Header field in the IPv6 + * header is either TCP or UDP. IPv6 extension headers + * are not supported with this feature. This feature + * cannot be set in features for a device with + * NETIF_F_HW_CSUM also set. This feature is being + * DEPRECATED (see below). + * + * * - %NETIF_F_RXCSUM + * - Driver (device) performs receive checksum offload. + * This flag is only used to disable the RX checksum + * feature for a device. The stack will accept receive + * checksum indication in packets received on a device + * regardless of whether NETIF_F_RXCSUM is set. + * + * Checksumming of received packets by device + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Indication of checksum verification is set in &sk_buff.ip_summed. + * Possible values are: + * + * - %CHECKSUM_NONE * * Device did not checksum this packet e.g. due to lack of capabilities. * The packet contains full (though not verified) checksum in packet but * not in skb->csum. Thus, skb->csum is undefined in this case. * - * CHECKSUM_UNNECESSARY: + * - %CHECKSUM_UNNECESSARY * * The hardware you're dealing with doesn't calculate the full checksum - * (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums - * for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY - * if their checksums are okay. skb->csum is still undefined in this case + * (as in %CHECKSUM_COMPLETE), but it does parse headers and verify checksums + * for specific protocols. For such packets it will set %CHECKSUM_UNNECESSARY + * if their checksums are okay. &sk_buff.csum is still undefined in this case * though. A driver or device must never modify the checksum field in the * packet even if checksum is verified. * - * CHECKSUM_UNNECESSARY is applicable to following protocols: - * TCP: IPv6 and IPv4. - * UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a + * %CHECKSUM_UNNECESSARY is applicable to following protocols: + * + * - TCP: IPv6 and IPv4. + * - UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a * zero UDP checksum for either IPv4 or IPv6, the networking stack * may perform further validation in this case. - * GRE: only if the checksum is present in the header. - * SCTP: indicates the CRC in SCTP header has been validated. - * FCOE: indicates the CRC in FC frame has been validated. + * - GRE: only if the checksum is present in the header. + * - SCTP: indicates the CRC in SCTP header has been validated. + * - FCOE: indicates the CRC in FC frame has been validated. * - * skb->csum_level indicates the number of consecutive checksums found in - * the packet minus one that have been verified as CHECKSUM_UNNECESSARY. + * &sk_buff.csum_level indicates the number of consecutive checksums found in + * the packet minus one that have been verified as %CHECKSUM_UNNECESSARY. * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet * and a device is able to verify the checksums for UDP (possibly zero), - * GRE (checksum flag is set) and TCP, skb->csum_level would be set to + * GRE (checksum flag is set) and TCP, &sk_buff.csum_level would be set to * two. If the device were only able to verify the UDP checksum and not * GRE, either because it doesn't support GRE checksum or because GRE * checksum is bad, skb->csum_level would be set to zero (TCP checksum is * not considered in this case). * - * CHECKSUM_COMPLETE: + * - %CHECKSUM_COMPLETE * * This is the most generic way. The device supplied checksum of the _whole_ - * packet as seen by netif_rx() and fills in skb->csum. This means the + * packet as seen by netif_rx() and fills in &sk_buff.csum. This means the * hardware doesn't need to parse L3/L4 headers to implement this. * * Notes: + * * - Even if device supports only some protocols, but is able to produce * skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY. * - CHECKSUM_COMPLETE is not applicable to SCTP and FCoE protocols. * - * CHECKSUM_PARTIAL: + * - %CHECKSUM_PARTIAL * * A checksum is set up to be offloaded to a device as described in the * output description for CHECKSUM_PARTIAL. This may occur on a packet @@ -146,14 +161,18 @@ * packet that are after the checksum being offloaded are not considered to * be verified. * - * C. Checksumming on transmit for non-GSO. The stack requests checksum offload - * in the skb->ip_summed for a packet. Values are: + * Checksumming on transmit for non-GSO + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * - * CHECKSUM_PARTIAL: + * The stack requests checksum offload in the &sk_buff.ip_summed for a packet. + * Values are: + * + * - %CHECKSUM_PARTIAL * * The driver is required to checksum the packet as seen by hard_start_xmit() - * from skb->csum_start up to the end, and to record/write the checksum at - * offset skb->csum_start + skb->csum_offset. A driver may verify that the + * from &sk_buff.csum_start up to the end, and to record/write the checksum at + * offset &sk_buff.csum_start + &sk_buff.csum_offset. + * A driver may verify that the * csum_start and csum_offset values are valid values given the length and * offset of the packet, but it should not attempt to validate that the * checksum refers to a legitimate transport layer checksum -- it is the @@ -165,55 +184,66 @@ * checksum calculation to the device, or call skb_checksum_help (in the case * that the device does not support offload for a particular checksum). * - * NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM are being deprecated in favor of - * NETIF_F_HW_CSUM. New devices should use NETIF_F_HW_CSUM to indicate + * %NETIF_F_IP_CSUM and %NETIF_F_IPV6_CSUM are being deprecated in favor of + * %NETIF_F_HW_CSUM. New devices should use %NETIF_F_HW_CSUM to indicate * checksum offload capability. - * skb_csum_hwoffload_help() can be called to resolve CHECKSUM_PARTIAL based + * skb_csum_hwoffload_help() can be called to resolve %CHECKSUM_PARTIAL based * on network device checksumming capabilities: if a packet does not match - * them, skb_checksum_help or skb_crc32c_help (depending on the value of - * csum_not_inet, see item D.) is called to resolve the checksum. + * them, skb_checksum_help() or skb_crc32c_help() (depending on the value of + * &sk_buff.csum_not_inet, see :ref:`crc`) + * is called to resolve the checksum. * - * CHECKSUM_NONE: + * - %CHECKSUM_NONE * * The skb was already checksummed by the protocol, or a checksum is not * required. * - * CHECKSUM_UNNECESSARY: + * - %CHECKSUM_UNNECESSARY * * This has the same meaning as CHECKSUM_NONE for checksum offload on * output. * - * CHECKSUM_COMPLETE: + * - %CHECKSUM_COMPLETE + * * Not used in checksum output. If a driver observes a packet with this value - * set in skbuff, it should treat the packet as if CHECKSUM_NONE were set. - * - * D. Non-IP checksum (CRC) offloads - * - * NETIF_F_SCTP_CRC - This feature indicates that a device is capable of - * offloading the SCTP CRC in a packet. To perform this offload the stack - * will set csum_start and csum_offset accordingly, set ip_summed to - * CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication in - * the skbuff that the CHECKSUM_PARTIAL refers to CRC32c. - * A driver that supports both IP checksum offload and SCTP CRC32c offload - * must verify which offload is configured for a packet by testing the - * value of skb->csum_not_inet; skb_crc32c_csum_help is provided to resolve - * CHECKSUM_PARTIAL on skbs where csum_not_inet is set to 1. - * - * NETIF_F_FCOE_CRC - This feature indicates that a device is capable of - * offloading the FCOE CRC in a packet. To perform this offload the stack - * will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset - * accordingly. Note that there is no indication in the skbuff that the - * CHECKSUM_PARTIAL refers to an FCOE checksum, so a driver that supports - * both IP checksum offload and FCOE CRC offload must verify which offload - * is configured for a packet, presumably by inspecting packet headers. - * - * E. Checksumming on output with GSO. - * - * In the case of a GSO packet (skb_is_gso(skb) is true), checksum offload + * set in skbuff, it should treat the packet as if %CHECKSUM_NONE were set. + * + * .. _crc: + * + * Non-IP checksum (CRC) offloads + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * .. flat-table:: + * :widths: 1 10 + * + * * - %NETIF_F_SCTP_CRC + * - This feature indicates that a device is capable of + * offloading the SCTP CRC in a packet. To perform this offload the stack + * will set csum_start and csum_offset accordingly, set ip_summed to + * %CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication + * in the skbuff that the %CHECKSUM_PARTIAL refers to CRC32c. + * A driver that supports both IP checksum offload and SCTP CRC32c offload + * must verify which offload is configured for a packet by testing the + * value of &sk_buff.csum_not_inet; skb_crc32c_csum_help() is provided to + * resolve %CHECKSUM_PARTIAL on skbs where csum_not_inet is set to 1. + * + * * - %NETIF_F_FCOE_CRC + * - This feature indicates that a device is capable of offloading the FCOE + * CRC in a packet. To perform this offload the stack will set ip_summed + * to %CHECKSUM_PARTIAL and set csum_start and csum_offset + * accordingly. Note that there is no indication in the skbuff that the + * %CHECKSUM_PARTIAL refers to an FCOE checksum, so a driver that supports + * both IP checksum offload and FCOE CRC offload must verify which offload + * is configured for a packet, presumably by inspecting packet headers. + * + * Checksumming on output with GSO + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * In the case of a GSO packet (skb_is_gso() is true), checksum offload * is implied by the SKB_GSO_* flags in gso_type. Most obviously, if the - * gso_type is SKB_GSO_TCPV4 or SKB_GSO_TCPV6, TCP checksum offload as + * gso_type is %SKB_GSO_TCPV4 or %SKB_GSO_TCPV6, TCP checksum offload as * part of the GSO operation is implied. If a checksum is being offloaded - * with GSO then ip_summed is CHECKSUM_PARTIAL, and both csum_start and + * with GSO then ip_summed is %CHECKSUM_PARTIAL, and both csum_start and * csum_offset are set to refer to the outermost checksum being offloaded * (two offloaded checksums are possible with UDP encapsulation). */ @@ -381,6 +411,19 @@ enum skb_drop_reason { * the ofo queue, corresponding to * LINUX_MIB_TCPOFOMERGE */ + SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to + * LINUX_MIB_PAWSESTABREJECTED + */ + SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */ + SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */ + SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */ + SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */ + SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */ + SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */ + SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */ + SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */ + SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */ + SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */ SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by * BPF_PROG_TYPE_CGROUP_SKB @@ -408,11 +451,9 @@ enum skb_drop_reason { */ SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ - SKB_DROP_REASON_PTYPE_ABSENT, /* not packet_type found to handle - * the skb. For an etner packet, - * this means that L3 protocol is - * not supported - */ + SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented + * or not supported + */ SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation * error */ @@ -444,9 +485,36 @@ enum skb_drop_reason { SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented * at tun/tap, e.g., check_filter() */ + SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */ + SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC + * 2211, such as a broadcasts + * ICMP_TIMESTAMP + */ + SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding + * to IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding + * to IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed + * the MTU) + */ SKB_DROP_REASON_MAX, }; +#define SKB_DR_INIT(name, reason) \ + enum skb_drop_reason name = SKB_DROP_REASON_##reason +#define SKB_DR(name) \ + SKB_DR_INIT(name, NOT_SPECIFIED) +#define SKB_DR_SET(name, reason) \ + (name = SKB_DROP_REASON_##reason) +#define SKB_DR_OR(name, reason) \ + do { \ + if (name == SKB_DROP_REASON_NOT_SPECIFIED || \ + name == SKB_NOT_DROPPED_YET) \ + SKB_DR_SET(name, reason); \ + } while (0) + /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. @@ -551,8 +619,10 @@ static inline bool skb_frag_must_loop(struct page *p) /** * struct skb_shared_hwtstamps - hardware time stamps - * @hwtstamp: hardware time stamp transformed into duration - * since arbitrary point in time + * @hwtstamp: hardware time stamp transformed into duration + * since arbitrary point in time + * @netdev_data: address/cookie of network device driver used as + * reference to actual hardware time stamp * * Software time stamps generated by ktime_get_real() are stored in * skb->tstamp. @@ -564,7 +634,10 @@ static inline bool skb_frag_must_loop(struct page *p) * &skb_shared_info. Use skb_hwtstamps() to get a pointer. */ struct skb_shared_hwtstamps { - ktime_t hwtstamp; + union { + ktime_t hwtstamp; + void *netdev_data; + }; }; /* Definitions for tx_flags in struct skb_shared_info */ @@ -578,16 +651,24 @@ enum { /* device driver is going to provide hardware time stamp */ SKBTX_IN_PROGRESS = 1 << 2, + /* generate hardware time stamp based on cycles if supported */ + SKBTX_HW_TSTAMP_USE_CYCLES = 1 << 3, + /* generate wifi status information (where possible) */ SKBTX_WIFI_STATUS = 1 << 4, + /* determine hardware time stamp based on time or cycles */ + SKBTX_HW_TSTAMP_NETDEV = 1 << 5, + /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, }; #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ SKBTX_SCHED_TSTAMP) -#define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) +#define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | \ + SKBTX_HW_TSTAMP_USE_CYCLES | \ + SKBTX_ANY_SW_TSTAMP) /* Definitions for flags in struct skb_shared_info */ enum { @@ -647,20 +728,6 @@ struct ubuf_info { int mm_account_pinned_pages(struct mmpin *mmp, size_t size); void mm_unaccount_pinned_pages(struct mmpin *mmp); -struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size); -struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, - struct ubuf_info *uarg); - -void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); - -void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, - bool success); - -int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len); -int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, - struct msghdr *msg, int len, - struct ubuf_info *uarg); - /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ @@ -691,16 +758,32 @@ struct skb_shared_info { skb_frag_t frags[MAX_SKB_FRAGS]; }; -/* We divide dataref into two halves. The higher 16 bits hold references - * to the payload part of skb->data. The lower 16 bits hold references to - * the entire skb->data. A clone of a headerless skb holds the length of - * the header in skb->hdr_len. - * - * All users must obey the rule that the skb->data reference count must be - * greater than or equal to the payload reference count. - * - * Holding a reference to the payload part means that the user does not - * care about modifications to the header part of skb->data. +/** + * DOC: dataref and headerless skbs + * + * Transport layers send out clones of payload skbs they hold for + * retransmissions. To allow lower layers of the stack to prepend their headers + * we split &skb_shared_info.dataref into two halves. + * The lower 16 bits count the overall number of references. + * The higher 16 bits indicate how many of the references are payload-only. + * skb_header_cloned() checks if skb is allowed to add / write the headers. + * + * The creator of the skb (e.g. TCP) marks its skb as &sk_buff.nohdr + * (via __skb_header_release()). Any clone created from marked skb will get + * &sk_buff.hdr_len populated with the available headroom. + * If there's the only clone in existence it's able to modify the headroom + * at will. The sequence of calls inside the transport layer is:: + * + * <alloc skb> + * skb_reserve() + * __skb_header_release() + * skb_clone() + * // send the clone down the stack + * + * This is not a very generic construct and it depends on the transport layers + * doing the right thing. In practice there's usually only one payload-only skb. + * Having multiple payload-only skbs with different lengths of hdr_len is not + * possible. The payload-only skbs should never leave their owner. */ #define SKB_DATAREF_SHIFT 16 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) @@ -765,6 +848,46 @@ typedef unsigned char *sk_buff_data_t; #endif /** + * DOC: Basic sk_buff geometry + * + * struct sk_buff itself is a metadata structure and does not hold any packet + * data. All the data is held in associated buffers. + * + * &sk_buff.head points to the main "head" buffer. The head buffer is divided + * into two parts: + * + * - data buffer, containing headers and sometimes payload; + * this is the part of the skb operated on by the common helpers + * such as skb_put() or skb_pull(); + * - shared info (struct skb_shared_info) which holds an array of pointers + * to read-only data in the (page, offset, length) format. + * + * Optionally &skb_shared_info.frag_list may point to another skb. + * + * Basic diagram may look like this:: + * + * --------------- + * | sk_buff | + * --------------- + * ,--------------------------- + head + * / ,----------------- + data + * / / ,----------- + tail + * | | | , + end + * | | | | + * v v v v + * ----------------------------------------------- + * | headroom | data | tailroom | skb_shared_info | + * ----------------------------------------------- + * + [page frag] + * + [page frag] + * + [page frag] + * + [page frag] --------- + * + frag_list --> | sk_buff | + * --------- + * + */ + +/** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list @@ -851,6 +974,7 @@ typedef unsigned char *sk_buff_data_t; * delivery_time at egress. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS + * @alloc_cpu: CPU which did the skb allocation. * @secmark: security marking * @mark: Generic packet mark * @reserved_tailroom: (aka @mark) number of bytes of free space available @@ -1043,6 +1167,7 @@ struct sk_buff { unsigned int sender_cpu; }; #endif + u16 alloc_cpu; #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; #endif @@ -1284,6 +1409,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size); +void skb_attempt_defer_free(struct sk_buff *skb); struct sk_buff *napi_build_skb(void *data, unsigned int frag_size); @@ -1639,6 +1765,27 @@ static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) } #endif +struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, + struct ubuf_info *uarg); + +void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); + +void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, + bool success); + +int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, + struct iov_iter *from, size_t length); + +static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, + struct msghdr *msg, int len) +{ + return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); +} + +int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, + struct msghdr *msg, int len, + struct ubuf_info *uarg); + /* Internal */ #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) @@ -1922,8 +2069,10 @@ static inline int skb_header_unclone(struct sk_buff *skb, gfp_t pri) } /** - * __skb_header_release - release reference to header - * @skb: buffer to operate on + * __skb_header_release() - allow clones to use the headroom + * @skb: buffer to operate on + * + * See "DOC: dataref and headerless skbs". */ static inline void __skb_header_release(struct sk_buff *skb) { @@ -2752,6 +2901,7 @@ static inline bool skb_transport_header_was_set(const struct sk_buff *skb) static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)); return skb->head + skb->transport_header; } @@ -3836,8 +3986,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, unsigned int flags, int *off, int *err); -struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, - int *err); +struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err); __poll_t datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, @@ -3886,7 +4035,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features, unsigned int offset); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); -int skb_ensure_writable(struct sk_buff *skb, int write_len); +int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); @@ -4895,9 +5044,7 @@ static inline void skb_forward_csum(struct sk_buff *skb) */ static inline void skb_checksum_none_assert(const struct sk_buff *skb) { -#ifdef DEBUG - BUG_ON(skb->ip_summed != CHECKSUM_NONE); -#endif + DEBUG_NET_WARN_ON_ONCE(skb->ip_summed != CHECKSUM_NONE); } bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h new file mode 100644 index 000000000000..7e00cca06709 --- /dev/null +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -0,0 +1,131 @@ +#ifndef __MTK_WED_H +#define __MTK_WED_H + +#include <linux/kernel.h> +#include <linux/rcupdate.h> +#include <linux/regmap.h> +#include <linux/pci.h> + +#define MTK_WED_TX_QUEUES 2 + +struct mtk_wed_hw; +struct mtk_wdma_desc; + +struct mtk_wed_ring { + struct mtk_wdma_desc *desc; + dma_addr_t desc_phys; + int size; + + u32 reg_base; + void __iomem *wpdma; +}; + +struct mtk_wed_device { +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + const struct mtk_wed_ops *ops; + struct device *dev; + struct mtk_wed_hw *hw; + bool init_done, running; + int wdma_idx; + int irq; + + struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES]; + struct mtk_wed_ring txfree_ring; + struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES]; + + struct { + int size; + void **pages; + struct mtk_wdma_desc *desc; + dma_addr_t desc_phys; + } buf_ring; + + /* filled by driver: */ + struct { + struct pci_dev *pci_dev; + + u32 wpdma_phys; + + u16 token_start; + unsigned int nbuf; + + u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); + int (*offload_enable)(struct mtk_wed_device *wed); + void (*offload_disable)(struct mtk_wed_device *wed); + } wlan; +#endif +}; + +struct mtk_wed_ops { + int (*attach)(struct mtk_wed_device *dev); + int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + int (*txfree_ring_setup)(struct mtk_wed_device *dev, + void __iomem *regs); + void (*detach)(struct mtk_wed_device *dev); + + void (*stop)(struct mtk_wed_device *dev); + void (*start)(struct mtk_wed_device *dev, u32 irq_mask); + void (*reset_dma)(struct mtk_wed_device *dev); + + u32 (*reg_read)(struct mtk_wed_device *dev, u32 reg); + void (*reg_write)(struct mtk_wed_device *dev, u32 reg, u32 val); + + u32 (*irq_get)(struct mtk_wed_device *dev, u32 mask); + void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask); +}; + +extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops; + +static inline int +mtk_wed_device_attach(struct mtk_wed_device *dev) +{ + int ret = -ENODEV; + +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + rcu_read_lock(); + dev->ops = rcu_dereference(mtk_soc_wed_ops); + if (dev->ops) + ret = dev->ops->attach(dev); + else + rcu_read_unlock(); + + if (ret) + dev->ops = NULL; +#endif + + return ret; +} + +#ifdef CONFIG_NET_MEDIATEK_SOC_WED +#define mtk_wed_device_active(_dev) !!(_dev)->ops +#define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev) +#define mtk_wed_device_start(_dev, _mask) (_dev)->ops->start(_dev, _mask) +#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->tx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_txfree_ring_setup(_dev, _regs) \ + (_dev)->ops->txfree_ring_setup(_dev, _regs) +#define mtk_wed_device_reg_read(_dev, _reg) \ + (_dev)->ops->reg_read(_dev, _reg) +#define mtk_wed_device_reg_write(_dev, _reg, _val) \ + (_dev)->ops->reg_write(_dev, _reg, _val) +#define mtk_wed_device_irq_get(_dev, _mask) \ + (_dev)->ops->irq_get(_dev, _mask) +#define mtk_wed_device_irq_set_mask(_dev, _mask) \ + (_dev)->ops->irq_set_mask(_dev, _mask) +#else +static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) +{ + return false; +} +#define mtk_wed_device_detach(_dev) do {} while (0) +#define mtk_wed_device_start(_dev, _mask) do {} while (0) +#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_txfree_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_reg_read(_dev, _reg) 0 +#define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0) +#define mtk_wed_device_irq_get(_dev, _mask) 0 +#define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0) +#endif + +#endif diff --git a/include/linux/string.h b/include/linux/string.h index b6572aeca2f5..61ec7e4f6311 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -252,6 +252,10 @@ static inline const char *kbasename(const char *path) #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) #include <linux/fortify-string.h> #endif +#ifndef unsafe_memcpy +#define unsafe_memcpy(dst, src, bytes, justification) \ + memcpy(dst, src, bytes) +#endif void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, int pad); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 6353d6db69b2..80263f7cdb77 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -38,10 +38,10 @@ struct ctl_table_header; struct ctl_dir; /* Keep the same order as in fs/proc/proc_sysctl.c */ -#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[0]) -#define SYSCTL_ZERO ((void *)&sysctl_vals[1]) -#define SYSCTL_ONE ((void *)&sysctl_vals[2]) -#define SYSCTL_TWO ((void *)&sysctl_vals[3]) +#define SYSCTL_ZERO ((void *)&sysctl_vals[0]) +#define SYSCTL_ONE ((void *)&sysctl_vals[1]) +#define SYSCTL_TWO ((void *)&sysctl_vals[2]) +#define SYSCTL_THREE ((void *)&sysctl_vals[3]) #define SYSCTL_FOUR ((void *)&sysctl_vals[4]) #define SYSCTL_ONE_HUNDRED ((void *)&sysctl_vals[5]) #define SYSCTL_TWO_HUNDRED ((void *)&sysctl_vals[6]) @@ -51,6 +51,7 @@ struct ctl_dir; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ #define SYSCTL_MAXOLDUID ((void *)&sysctl_vals[10]) +#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[11]) extern const int sysctl_vals[]; diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h index 809bccd08455..cc42db51bbba 100644 --- a/include/linux/usb/rndis_host.h +++ b/include/linux/usb/rndis_host.h @@ -197,6 +197,7 @@ struct rndis_keepalive_c { /* IN (optionally OUT) */ /* Flags for driver_info::data */ #define RNDIS_DRIVER_DATA_POLL_STATUS 1 /* poll status before control */ +#define RNDIS_DRIVER_DATA_DST_MAC_FIXUP 2 /* device ignores configured MAC address */ extern void rndis_status(struct usbnet *dev, struct urb *urb); extern int diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 8336e86ce606..1b4d72d5e891 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -214,6 +214,7 @@ extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf) extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_status(struct usbnet *, struct urb *); +extern int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb); /* CDC and RNDIS support the same host-chosen packet filters for IN transfers */ #define DEFAULT_FILTER (USB_CDC_PACKET_TYPE_BROADCAST \ |
