From 06ae48269d1e0324d806fca30fe77112f4a4a14a Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 6 Jul 2018 15:13:18 -0700 Subject: lib: reciprocal_div: implement the improved algorithm on the paper mentioned The new added "reciprocal_value_adv" implements the advanced version of the algorithm described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose ceil(log2(d)) result will be 32 which then requires u128 divide on host. The exception case could be easily handled before calling "reciprocal_value_adv". The advanced version requires more complex calculation to get the reciprocal multiplier and other control variables, but then could reduce the required emulation operations. It makes no sense to use this advanced version for host divide emulation, those extra complexities for calculating multiplier etc could completely waive our saving on emulation operations. However, it makes sense to use it for JIT divide code generation (for example eBPF JIT backends) for which we are willing to trade performance of JITed code with that of host. As shown by the following pseudo code, the required emulation operations could go down from 6 (the basic version) to 3 or 4. To use the result of "reciprocal_value_adv", suppose we want to calculate n/d, the C-style pseudo code will be the following, it could be easily changed to real code generation for other JIT targets. struct reciprocal_value_adv rvalue; u8 pre_shift, exp; // handle exception case. if (d >= (1U << 31)) { result = n >= d; return; } rvalue = reciprocal_value_adv(d, 32) exp = rvalue.exp; if (rvalue.is_wide_m && !(d & 1)) { // floor(log2(d & (2^32 -d))) pre_shift = fls(d & -d) - 1; rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); } else { pre_shift = 0; } // code generation starts. if (imm == 1U << exp) { result = n >> exp; } else if (rvalue.is_wide_m) { // pre_shift must be zero when reached here. t = (n * rvalue.m) >> 32; result = n - t; result >>= 1; result += t; result >>= rvalue.sh - 1; } else { if (pre_shift) result = n >> pre_shift; result = ((u64)result * rvalue.m) >> 32; result >>= rvalue.sh; } Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/reciprocal_div.h | 68 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h index e031e9f2f9d8..585ce89c0f33 100644 --- a/include/linux/reciprocal_div.h +++ b/include/linux/reciprocal_div.h @@ -25,6 +25,9 @@ struct reciprocal_value { u8 sh1, sh2; }; +/* "reciprocal_value" and "reciprocal_divide" together implement the basic + * version of the algorithm described in Figure 4.1 of the paper. + */ struct reciprocal_value reciprocal_value(u32 d); static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) @@ -33,4 +36,69 @@ static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) return (t + ((a - t) >> R.sh1)) >> R.sh2; } +struct reciprocal_value_adv { + u32 m; + u8 sh, exp; + bool is_wide_m; +}; + +/* "reciprocal_value_adv" implements the advanced version of the algorithm + * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose + * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The + * exception case could be easily handled before calling "reciprocal_value_adv". + * + * The advanced version requires more complex calculation to get the reciprocal + * multiplier and other control variables, but then could reduce the required + * emulation operations. + * + * It makes no sense to use this advanced version for host divide emulation, + * those extra complexities for calculating multiplier etc could completely + * waive our saving on emulation operations. + * + * However, it makes sense to use it for JIT divide code generation for which + * we are willing to trade performance of JITed code with that of host. As shown + * by the following pseudo code, the required emulation operations could go down + * from 6 (the basic version) to 3 or 4. + * + * To use the result of "reciprocal_value_adv", suppose we want to calculate + * n/d, the pseudo C code will be: + * + * struct reciprocal_value_adv rvalue; + * u8 pre_shift, exp; + * + * // handle exception case. + * if (d >= (1U << 31)) { + * result = n >= d; + * return; + * } + * + * rvalue = reciprocal_value_adv(d, 32) + * exp = rvalue.exp; + * if (rvalue.is_wide_m && !(d & 1)) { + * // floor(log2(d & (2^32 -d))) + * pre_shift = fls(d & -d) - 1; + * rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); + * } else { + * pre_shift = 0; + * } + * + * // code generation starts. + * if (imm == 1U << exp) { + * result = n >> exp; + * } else if (rvalue.is_wide_m) { + * // pre_shift must be zero when reached here. + * t = (n * rvalue.m) >> 32; + * result = n - t; + * result >>= 1; + * result += t; + * result >>= rvalue.sh - 1; + * } else { + * if (pre_shift) + * result = n >> pre_shift; + * result = ((u64)result * rvalue.m) >> 32; + * result >>= rvalue.sh; + * } + */ +struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec); + #endif /* _LINUX_RECIPROCAL_DIV_H */ -- cgit v1.2.3 From 6b8675897338f874c41612655a85d8e10cdb23d8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jul 2018 20:36:39 -0700 Subject: xdp: don't make drivers report attachment mode prog_attached of struct netdev_bpf should have been superseded by simply setting prog_id long time ago, but we kept it around to allow offloading drivers to communicate attachment mode (drv vs hw). Subsequently drivers were also allowed to report back attachment flags (prog_flags), and since nowadays only programs attached will XDP_FLAGS_HW_MODE can get offloaded, we can tell the attachment mode from the flags driver reports. Remove prog_attached member. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- include/linux/netdevice.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b683971e500d..69a664789b33 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -819,10 +819,6 @@ enum bpf_netdev_command { */ XDP_SETUP_PROG, XDP_SETUP_PROG_HW, - /* Check if a bpf program is set on the device. The callee should - * set @prog_attached to one of XDP_ATTACHED_* values, note that "true" - * is equivalent to XDP_ATTACHED_DRV. - */ XDP_QUERY_PROG, /* BPF program for offload callbacks, invoked at program load time. */ BPF_OFFLOAD_VERIFIER_PREP, @@ -849,7 +845,6 @@ struct netdev_bpf { }; /* XDP_QUERY_PROG */ struct { - u8 prog_attached; u32 prog_id; /* flags with which program was installed */ u32 prog_flags; -- cgit v1.2.3 From a25717d2b604347d9af8da81deea7b08e8c94220 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jul 2018 20:36:41 -0700 Subject: xdp: support simultaneous driver and hw XDP attachment Split the query of HW-attached program from the software one. Introduce new .ndo_bpf command to query HW-attached program. This will allow drivers to install different programs in HW and SW at the same time. Netlink can now also carry multiple programs on dump (in which case mode will be set to XDP_ATTACHED_MULTI and user has to check per-attachment point attributes, IFLA_XDP_PROG_ID will not be present). We reuse IFLA_XDP_PROG_ID skb space for second mode, so rtnl_xdp_size() doesn't need to be updated. Note that the installation side is still not there, since all drivers currently reject installing more than one program at the time. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- include/linux/netdevice.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 69a664789b33..2422c0e88f5c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -820,6 +820,7 @@ enum bpf_netdev_command { XDP_SETUP_PROG, XDP_SETUP_PROG_HW, XDP_QUERY_PROG, + XDP_QUERY_PROG_HW, /* BPF program for offload callbacks, invoked at program load time. */ BPF_OFFLOAD_VERIFIER_PREP, BPF_OFFLOAD_TRANSLATE, @@ -843,7 +844,7 @@ struct netdev_bpf { struct bpf_prog *prog; struct netlink_ext_ack *extack; }; - /* XDP_QUERY_PROG */ + /* XDP_QUERY_PROG, XDP_QUERY_PROG_HW */ struct { u32 prog_id; /* flags with which program was installed */ @@ -3533,8 +3534,8 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags); -void __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op, - struct netdev_bpf *xdp); +u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op, + enum bpf_netdev_command cmd); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); -- cgit v1.2.3