From 30ca1aa536211f5ac3de0173513a7a99a98a97f3 Mon Sep 17 00:00:00 2001 From: Dedy Lansky Date: Sun, 29 Jul 2018 14:59:16 +0300 Subject: cfg80211/mac80211: make ieee80211_send_layer2_update a public function Make ieee80211_send_layer2_update() a common function so other drivers can re-use it. Signed-off-by: Dedy Lansky Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9a850973e09a..4f57f770f602 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4732,6 +4732,17 @@ static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len) const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type, const u8 *ies, int len); +/** + * cfg80211_send_layer2_update - send layer 2 update frame + * + * @dev: network device + * @addr: STA MAC address + * + * Wireless drivers can use this function to update forwarding tables in bridge + * devices upon STA association. + */ +void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr); + /** * DOC: Regulatory enforcement infrastructure * -- cgit v1.2.3 From 21a5d4c3a45ca608477a083096cfbce76e449a0c Mon Sep 17 00:00:00 2001 From: Manikanta Pubbisetty Date: Wed, 11 Jul 2018 00:12:53 +0530 Subject: mac80211: add stop/start logic for software TXQs Sometimes, it is required to stop the transmissions momentarily and resume it later; stopping the txqs becomes very critical in scenarios where the packet transmission has to be ceased completely. For example, during the hardware restart, during off channel operations, when initiating CSA(upon detecting a radar on the DFS channel), etc. The TX queue stop/start logic in mac80211 works well in stopping the TX when drivers make use of netdev queues, i.e, when Qdiscs in network layer take care of traffic scheduling. Since the devices implementing wake_tx_queue can run without Qdiscs, packets will be handed to mac80211 directly without queueing them in the netdev queues. Also, mac80211 does not invoke any of the netif_stop_*/netif_wake_* APIs if wake_tx_queue is implemented. Since the queues are not stopped in this case, transmissions can continue and this will impact negatively on the operation of the wireless device. For example, During hardware restart, we stop the netdev queues so that packets are not sent to the driver. Since ath10k implements wake_tx_queue, TX queues will not be stopped and packets might reach the hardware while it is restarting; this can make hardware unresponsive and the only possible option for recovery is to reboot the entire system. There is another problem to this, it is observed that the packets were sent on the DFS channel for a prolonged duration after radar detection impacting the channel closing time. We can still invoke netif stop/wake APIs when wake_tx_queue is implemented but this could lead to packet drops in network layer; adding stop/start logic for software TXQs in mac80211 instead makes more sense; the change proposed adds the same in mac80211. Signed-off-by: Manikanta Pubbisetty Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5790f55c241d..e248f5fe5b19 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1504,6 +1504,8 @@ enum ieee80211_vif_flags { * @drv_priv: data area for driver use, will always be aligned to * sizeof(void \*). * @txq: the multicast data TX queue (if driver uses the TXQ abstraction) + * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped, + * protected by fq->lock. */ struct ieee80211_vif { enum nl80211_iftype type; @@ -1528,6 +1530,8 @@ struct ieee80211_vif { unsigned int probe_req_reg; + bool txqs_stopped[IEEE80211_NUM_ACS]; + /* must be last */ u8 drv_priv[0] __aligned(sizeof(void *)); }; -- cgit v1.2.3 From 9cf0a0b4b64ae103cf0e7dfaa72b44ecda24c0eb Mon Sep 17 00:00:00 2001 From: Alexei Avshalom Lazar Date: Mon, 13 Aug 2018 15:33:00 +0300 Subject: cfg80211: Add support for 60GHz band channels 5 and 6 The current support in the 60GHz band is for channels 1-4. Add support for channels 5 and 6. This requires enlarging ieee80211_channel.center_freq from u16 to u32. Signed-off-by: Alexei Avshalom Lazar Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- include/uapi/linux/nl80211.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4f57f770f602..46c4cbf54903 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -149,7 +149,7 @@ enum ieee80211_channel_flags { */ struct ieee80211_channel { enum nl80211_band band; - u16 center_freq; + u32 center_freq; u16 hw_value; u32 flags; int max_antenna_gain; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 7acc16f34942..023989604fc6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4338,7 +4338,7 @@ enum nl80211_txrate_gi { * enum nl80211_band - Frequency band * @NL80211_BAND_2GHZ: 2.4 GHz ISM band * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz) - * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 64.80 GHz) + * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 69.12 GHz) * @NUM_NL80211_BANDS: number of bands, avoid using this in userspace * since newer kernel versions may support more bands */ -- cgit v1.2.3 From 9c06602b1b920ed6b546632bdbbc1f400eea5242 Mon Sep 17 00:00:00 2001 From: Balaji Pothunoori Date: Thu, 19 Jul 2018 18:56:27 +0530 Subject: cfg80211: clarify frames covered by average ACK signal report Modify the API to include all ACK frames in average ACK signal strength reporting, not just ACKs for data frames. Make exposing the data conditional on implementing the extended feature flag. This is how it was really implemented in mac80211, update the code there to use the new defines and clean up some of the setting code. Keep nl80211.h source compatibility by keeping the old names. Signed-off-by: Balaji Pothunoori [rewrite commit log, change compatibility to be old=new instead of the other way around, update kernel-doc, roll in mac80211 changes, make mac80211 depend on valid bit instead of HW flag] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 023989604fc6..1766a12b231c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3050,8 +3050,7 @@ enum nl80211_sta_bss_param { * received from the station (u64, usec) * @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment * @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm) - * @NL80211_STA_INFO_DATA_ACK_SIGNAL_AVG: avg signal strength of (data) - * ACK frame (s8, dBm) + * @NL80211_STA_INFO_ACK_SIGNAL_AVG: avg signal strength of ACK frames (s8, dBm) * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -3091,13 +3090,17 @@ enum nl80211_sta_info { NL80211_STA_INFO_RX_DURATION, NL80211_STA_INFO_PAD, NL80211_STA_INFO_ACK_SIGNAL, - NL80211_STA_INFO_DATA_ACK_SIGNAL_AVG, + NL80211_STA_INFO_ACK_SIGNAL_AVG, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, NL80211_STA_INFO_MAX = __NL80211_STA_INFO_AFTER_LAST - 1 }; +/* we renamed this - stay compatible */ +#define NL80211_STA_INFO_DATA_ACK_SIGNAL_AVG NL80211_STA_INFO_ACK_SIGNAL_AVG + + /** * enum nl80211_tid_stats - per TID statistics attributes * @__NL80211_TID_STATS_INVALID: attribute number 0 is reserved @@ -5213,9 +5216,8 @@ enum nl80211_feature_flags { * "radar detected" event. * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211: Driver supports sending and * receiving control port frames over nl80211 instead of the netdevice. - * @NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT: This Driver support data ack - * rssi if firmware support, this flag is to intimate about ack rssi - * support to nl80211. + * @NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT: This driver/device supports + * (average) ACK signal strength reporting. * @NL80211_EXT_FEATURE_TXQS: Driver supports FQ-CoDel-enabled intermediate * TXQs. * @NL80211_EXT_FEATURE_SCAN_RANDOM_SN: Driver/device supports randomizing the @@ -5255,7 +5257,9 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN, NL80211_EXT_FEATURE_DFS_OFFLOAD, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211, - NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT, + NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT, + /* we renamed this - stay compatible */ + NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT = NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT, NL80211_EXT_FEATURE_TXQS, NL80211_EXT_FEATURE_SCAN_RANDOM_SN, NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT, -- cgit v1.2.3 From b0d1beeff2a97a0cf1965ea8f1d13b8973f22582 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 28 Aug 2018 14:44:25 +0200 Subject: xdp: implement convert_to_xdp_frame for MEM_TYPE_ZERO_COPY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds proper MEM_TYPE_ZERO_COPY support for convert_to_xdp_frame. Converting a MEM_TYPE_ZERO_COPY xdp_buff to an xdp_frame is done by transforming the MEM_TYPE_ZERO_COPY buffer into a MEM_TYPE_PAGE_ORDER0 frame. This is costly, and in the future it might make sense to implement a more sophisticated thread-safe alloc/free scheme for MEM_TYPE_ZERO_COPY, so that no allocation and copy is required in the fast-path. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 76b95256c266..0d5c6fb4b2e2 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -91,6 +91,8 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame) frame->dev_rx = NULL; } +struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp); + /* Convert xdp_buff to xdp_frame */ static inline struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) @@ -99,9 +101,8 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) int metasize; int headroom; - /* TODO: implement clone, copy, use "native" MEM_TYPE */ if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) - return NULL; + return xdp_convert_zc_to_xdp_frame(xdp); /* Assure headroom is available for storing info */ headroom = xdp->data - xdp->data_hard_start; -- cgit v1.2.3 From dce5bd6140a436e3348f6d13a1efb6e6c5a89acd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 28 Aug 2018 14:44:26 +0200 Subject: xdp: export xdp_rxq_info_unreg_mem_model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export __xdp_rxq_info_unreg_mem_model as xdp_rxq_info_unreg_mem_model, so it can be used from netdev drivers. Also, add additional checks for the memory type. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 0d5c6fb4b2e2..0f25b3675c5c 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -136,6 +136,7 @@ void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, enum xdp_mem_type type, void *allocator); +void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); /* Drivers not supporting XDP metadata can use this helper, which * rejects any room expansion for metadata as a result. -- cgit v1.2.3 From 902540342096af8a13351f6a22bfdd7a8e19ffd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 28 Aug 2018 14:44:27 +0200 Subject: xsk: expose xdp_umem_get_{data,dma} to drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the xdp_umem_get_{data,dma} functions to include/net/xdp_sock.h, so that the upcoming zero-copy implementation in the Ethernet drivers can utilize them. Also, supply some dummy function implementations for CONFIG_XDP_SOCKETS=n configs. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov --- include/net/xdp_sock.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 7161856bcf9c..56994ad1ab40 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -79,6 +79,16 @@ void xsk_umem_discard_addr(struct xdp_umem *umem); void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len); void xsk_umem_consume_tx_done(struct xdp_umem *umem); + +static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) +{ + return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1)); +} + +static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) +{ + return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1)); +} #else static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { @@ -98,6 +108,39 @@ static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) { return false; } + +static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) +{ + return NULL; +} + +static inline void xsk_umem_discard_addr(struct xdp_umem *umem) +{ +} + +static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) +{ +} + +static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, + u32 *len) +{ + return false; +} + +static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem) +{ +} + +static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) +{ + return NULL; +} + +static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) +{ + return 0; +} #endif /* CONFIG_XDP_SOCKETS */ #endif /* _LINUX_XDP_SOCK_H */ -- cgit v1.2.3 From 6c5c9581044dd6e0cd284ab653502fb9264f08b6 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 28 Aug 2018 14:44:28 +0200 Subject: net: add napi_if_scheduled_mark_missed The function napi_if_scheduled_mark_missed is used to check if the NAPI context is scheduled, if so set NAPIF_STATE_MISSED and return true. Used by the AF_XDP zero-copy i40e Tx code implementation in order to make sure that irq affinity is honored by the napi context. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov --- include/linux/netdevice.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca5ab98053c8..4271f6b4e419 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -535,6 +535,32 @@ static inline void napi_synchronize(const struct napi_struct *n) barrier(); } +/** + * napi_if_scheduled_mark_missed - if napi is running, set the + * NAPIF_STATE_MISSED + * @n: NAPI context + * + * If napi is running, set the NAPIF_STATE_MISSED, and return true if + * NAPI is scheduled. + **/ +static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n) +{ + unsigned long val, new; + + do { + val = READ_ONCE(n->state); + if (val & NAPIF_STATE_DISABLE) + return true; + + if (!(val & NAPIF_STATE_SCHED)) + return false; + + new = val | NAPIF_STATE_MISSED; + } while (cmpxchg(&n->state, val, new) != val); + + return true; +} + enum netdev_queue_state_t { __QUEUE_STATE_DRV_XOFF, __QUEUE_STATE_STACK_XOFF, -- cgit v1.2.3 From 679c782de14bd48c19dd74cd1af20a2bc05dd936 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 22 Aug 2018 20:02:19 +0100 Subject: bpf/verifier: per-register parent pointers By giving each register its own liveness chain, we elide the skip_callee() logic. Instead, each register's parent is the state it inherits from; both check_func_call() and prepare_func_exit() automatically connect reg states to the correct chain since when they copy the reg state across (r1-r5 into the callee as args, and r0 out as the return value) they also copy the parent pointer. Signed-off-by: Edward Cree Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 38b04f559ad3..b42b60a83e19 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -41,6 +41,7 @@ enum bpf_reg_liveness { }; struct bpf_reg_state { + /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; union { /* valid when type == PTR_TO_PACKET */ @@ -59,7 +60,6 @@ struct bpf_reg_state { * came from, when one is tested for != NULL. */ u32 id; - /* Ordering of fields matters. See states_equal() */ /* For scalar types (SCALAR_VALUE), this represents our knowledge of * the actual value. * For pointer types, this represents the variable part of the offset @@ -76,15 +76,15 @@ struct bpf_reg_state { s64 smax_value; /* maximum possible (s64)value */ u64 umin_value; /* minimum possible (u64)value */ u64 umax_value; /* maximum possible (u64)value */ + /* parentage chain for liveness checking */ + struct bpf_reg_state *parent; /* Inside the callee two registers can be both PTR_TO_STACK like * R1=fp-8 and R2=fp-8, but one of them points to this function stack * while another to the caller's stack. To differentiate them 'frameno' * is used which is an index in bpf_verifier_state->frame[] array * pointing to bpf_func_state. - * This field must be second to last, for states_equal() reasons. */ u32 frameno; - /* This field must be last, for states_equal() reasons. */ enum bpf_reg_liveness live; }; @@ -107,7 +107,6 @@ struct bpf_stack_state { */ struct bpf_func_state { struct bpf_reg_state regs[MAX_BPF_REG]; - struct bpf_verifier_state *parent; /* index of call instruction that called into this func */ int callsite; /* stack frame number of this function state from pov of @@ -129,7 +128,6 @@ struct bpf_func_state { struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; - struct bpf_verifier_state *parent; u32 curframe; }; -- cgit v1.2.3 From 6fce10f70461c14079d5d44aa2b25c693f4d9221 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Tue, 28 Aug 2018 18:51:58 +0200 Subject: genetlink: constify genl_err_attr() argument genl_err_attr() sets netlink_ext_ack::bad_attr which is a pointer to const struct nlattr so make the attr argument also const. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/net/genetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index decf6012a401..aa2e5888f18d 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -112,7 +112,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg) static inline int genl_err_attr(struct genl_info *info, int err, - struct nlattr *attr) + const struct nlattr *attr) { info->extack->bad_attr = attr; -- cgit v1.2.3 From 9b3004953503462a4fab31b85e44ae446d48f0bd Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Tue, 28 Aug 2018 19:56:58 +0200 Subject: ethtool: drop get_settings and set_settings callbacks Since [gs]et_settings ethtool_ops callbacks have been deprecated in February 2016, all in tree NIC drivers have been converted to provide [gs]et_link_ksettings() and out of tree drivers have had enough time to do the same. Drop get_settings() and set_settings() and implement both ETHTOOL_[GS]SET and ETHTOOL_[GS]LINKSETTINGS only using [gs]et_link_ksettings(). Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/linux/ethtool.h | 33 ++++++++++----------------------- include/uapi/linux/ethtool.h | 15 +++------------ 2 files changed, 13 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index f8a2245b70ac..afd9596ce636 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -183,14 +183,6 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, /** * struct ethtool_ops - optional netdev operations - * @get_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings - * API. Get various device settings including Ethernet link - * settings. The @cmd parameter is expected to have been cleared - * before get_settings is called. Returns a negative error code - * or zero. - * @set_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings - * API. Set various device settings including Ethernet link - * settings. Returns a negative error code or zero. * @get_drvinfo: Report driver/device information. Should only set the * @driver, @version, @fw_version and @bus_info fields. If not * implemented, the @driver and @bus_info fields will be filled in @@ -297,19 +289,16 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * a TX queue has this number, return -EINVAL. If only a RX queue or a TX * queue has this number, ignore the inapplicable fields. * Returns a negative error code or zero. - * @get_link_ksettings: When defined, takes precedence over the - * %get_settings method. Get various device settings - * including Ethernet link settings. The %cmd and - * %link_mode_masks_nwords fields should be ignored (use - * %__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter), any - * change to them will be overwritten by kernel. Returns a - * negative error code or zero. - * @set_link_ksettings: When defined, takes precedence over the - * %set_settings method. Set various device settings including - * Ethernet link settings. The %cmd and %link_mode_masks_nwords - * fields should be ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS - * instead of the latter), any change to them will be overwritten - * by kernel. Returns a negative error code or zero. + * @get_link_ksettings: Get various device settings including Ethernet link + * settings. The %cmd and %link_mode_masks_nwords fields should be + * ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter), + * any change to them will be overwritten by kernel. Returns a negative + * error code or zero. + * @set_link_ksettings: Set various device settings including Ethernet link + * settings. The %cmd and %link_mode_masks_nwords fields should be + * ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter), + * any change to them will be overwritten by kernel. Returns a negative + * error code or zero. * @get_fecparam: Get the network device Forward Error Correction parameters. * @set_fecparam: Set the network device Forward Error Correction parameters. * @get_ethtool_phy_stats: Return extended statistics about the PHY device. @@ -329,8 +318,6 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * of the generic netdev features interface. */ struct ethtool_ops { - int (*get_settings)(struct net_device *, struct ethtool_cmd *); - int (*set_settings)(struct net_device *, struct ethtool_cmd *); void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index dc69391d2bba..c8f8e2455bf3 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -91,10 +91,6 @@ * %ETHTOOL_GSET to get the current values before making specific * changes and then applying them with %ETHTOOL_SSET. * - * Drivers that implement set_settings() should validate all fields - * other than @cmd that are not described as read-only or deprecated, - * and must ignore all fields described as read-only. - * * Deprecated fields should be ignored by both users and drivers. */ struct ethtool_cmd { @@ -1800,14 +1796,9 @@ enum ethtool_reset_flags { * rejected. * * Deprecated %ethtool_cmd fields transceiver, maxtxpkt and maxrxpkt - * are not available in %ethtool_link_settings. Until all drivers are - * converted to ignore them or to the new %ethtool_link_settings API, - * for both queries and changes, users should always try - * %ETHTOOL_GLINKSETTINGS first, and if it fails with -ENOTSUPP stick - * only to %ETHTOOL_GSET and %ETHTOOL_SSET consistently. If it - * succeeds, then users should stick to %ETHTOOL_GLINKSETTINGS and - * %ETHTOOL_SLINKSETTINGS (which would support drivers implementing - * either %ethtool_cmd or %ethtool_link_settings). + * are not available in %ethtool_link_settings. These fields will be + * always set to zero in %ETHTOOL_GSET reply and %ETHTOOL_SSET will + * fail if any of them is set to non-zero value. * * Users should assume that all fields not marked read-only are * writable and subject to validation by the driver. They should use -- cgit v1.2.3 From a4e0109a19c554e44c832958e426303781e1ad30 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Mon, 20 Aug 2018 08:12:32 -0700 Subject: virtchnl: use u8 type for a field in the virtchnl_filter struct The virtchnl_filter struct has a field called field_flags. A previous commit mistakenly had the type to be a __u8. What we want is for the field to be an unsigned 8 bit value, so let's just use the existing kernel type u8 for that. Signed-off-by: Harshitha Ramamurthy Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 212b3822d180..b41f7bc958ef 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -573,7 +573,7 @@ struct virtchnl_filter { enum virtchnl_flow_type flow_type; enum virtchnl_action action; u32 action_meta; - __u8 field_flags; + u8 field_flags; }; VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter); -- cgit v1.2.3 From 93ee30f3e8b412c5fc2d2f7d9d002529d9a209ad Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 31 Aug 2018 13:40:02 +0200 Subject: xsk: i40e: get rid of useless struct xdp_umem_props This commit gets rid of the structure xdp_umem_props. It was there to be able to break a dependency at one point, but this is no longer needed. The values in the struct are instead stored directly in the xdp_umem structure. This simplifies the xsk code as well as af_xdp zero-copy drivers and as a bonus gets rid of one internal header file. The i40e driver is also adapted to the new interface in this commit. Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann --- include/net/xdp_sock.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 56994ad1ab40..932ca0dad6f3 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -16,11 +16,6 @@ struct net_device; struct xsk_queue; -struct xdp_umem_props { - u64 chunk_mask; - u64 size; -}; - struct xdp_umem_page { void *addr; dma_addr_t dma; @@ -30,7 +25,8 @@ struct xdp_umem { struct xsk_queue *fq; struct xsk_queue *cq; struct xdp_umem_page *pages; - struct xdp_umem_props props; + u64 chunk_mask; + u64 size; u32 headroom; u32 chunk_size_nohr; struct user_struct *user; -- cgit v1.2.3 From f061b48c1787e6fece2190e27da6878f4f1796d0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 29 Aug 2018 10:15:35 -0700 Subject: Revert "net: sched: act: add extack for lookup callback" This reverts commit 331a9295de23 ("net: sched: act: add extack for lookup callback"). This extack is never used after 6 months... In fact, it can be just set in the caller, right after ->lookup(). Cc: Alexander Aring Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 970303448c90..c6f195b3c706 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -85,8 +85,7 @@ struct tc_action_ops { struct tcf_result *); /* called under RCU BH lock*/ int (*dump)(struct sk_buff *, struct tc_action *, int, int); void (*cleanup)(struct tc_action *); - int (*lookup)(struct net *net, struct tc_action **a, u32 index, - struct netlink_ext_ack *extack); + int (*lookup)(struct net *net, struct tc_action **a, u32 index); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, int bind, bool rtnl_held, -- cgit v1.2.3 From 94524d8fc965a7a0facdef6d1b01d5ef6d71a802 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Wed, 29 Aug 2018 15:26:55 +0530 Subject: net/tls: Add support for async decryption of tls records When tls records are decrypted using asynchronous acclerators such as NXP CAAM engine, the crypto apis return -EINPROGRESS. Presently, on getting -EINPROGRESS, the tls record processing stops till the time the crypto accelerator finishes off and returns the result. This incurs a context switch and is not an efficient way of accessing the crypto accelerators. Crypto accelerators work efficient when they are queued with multiple crypto jobs without having to wait for the previous ones to complete. The patch submits multiple crypto requests without having to wait for for previous ones to complete. This has been implemented for records which are decrypted in zero-copy mode. At the end of recvmsg(), we wait for all the asynchronous decryption requests to complete. The references to records which have been sent for async decryption are dropped. For cases where record decryption is not possible in zero-copy mode, asynchronous decryption is not used and we wait for decryption crypto api to complete. For crypto requests executing in async fashion, the memory for aead_request, sglists and skb etc is freed from the decryption completion handler. The decryption completion handler wakesup the sleeping user context when recvmsg() flags that it has done sending all the decryption requests and there are no more decryption requests pending to be completed. Signed-off-by: Vakul Garg Reviewed-by: Dave Watson Signed-off-by: David S. Miller --- include/net/tls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index d5c683e8bb22..cd0a65bd92f9 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -124,6 +124,12 @@ struct tls_sw_context_rx { struct sk_buff *recv_pkt; u8 control; bool decrypted; + atomic_t decrypt_pending; + bool async_notify; +}; + +struct decrypt_req_ctx { + struct sock *sk; }; struct tls_record_info { -- cgit v1.2.3 From b9de3963cc2b373a655636335cb8c4ed12fc9d3b Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Thu, 30 Aug 2018 16:39:23 +0200 Subject: net/sched: fix type of htb statistics tokens and ctokens are defined as s64 in htb_class structure, and clamped to 32bits value during netlink dumps: cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens), INT_MIN, INT_MAX); Defining it as u32 is working since userspace (tc) is printing it as signed int, but a correct definition from the beginning is probably better. In the same time, 'giants' structure member is unused since years, so update the comment to mark it unused. Signed-off-by: Florent Fourcot Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 8975fd1a1421..e9b7244ac381 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -395,9 +395,9 @@ enum { struct tc_htb_xstats { __u32 lends; __u32 borrows; - __u32 giants; /* too big packets (rate will not be accurate) */ - __u32 tokens; - __u32 ctokens; + __u32 giants; /* unused since 'Make HTB scheduler work with TSO.' */ + __s32 tokens; + __s32 ctokens; }; /* HFSC section */ -- cgit v1.2.3 From aa7e80b220f3a543eefbe4b7e2c5d2b73e2e2ef7 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Mon, 3 Sep 2018 20:19:28 +0300 Subject: net/mlx5: Fix atomic_mode enum values The field atomic_mode is 4 bits wide and therefore can hold values from 0x0 to 0xf. Remove the unnecessary 20 bit shift that made the values be incorrect. While that, remove unused enum values. Fixes: 57cda166bbe0 ("net/mlx5: Add DCT command interface") Signed-off-by: Moni Shoua Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7a452716de4b..d885e9f0e054 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -163,10 +163,7 @@ enum mlx5_dcbx_oper_mode { }; enum mlx5_dct_atomic_mode { - MLX5_ATOMIC_MODE_DCT_OFF = 20, - MLX5_ATOMIC_MODE_DCT_NONE = 0 << MLX5_ATOMIC_MODE_DCT_OFF, - MLX5_ATOMIC_MODE_DCT_IB_COMP = 1 << MLX5_ATOMIC_MODE_DCT_OFF, - MLX5_ATOMIC_MODE_DCT_CX = 2 << MLX5_ATOMIC_MODE_DCT_OFF, + MLX5_ATOMIC_MODE_DCT_CX = 2, }; enum { -- cgit v1.2.3 From 8ce78257965e6cd49720e653867e766ecd38883f Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:41 +0300 Subject: net/mlx5: Add proper NIC TX steering flow tables support Extend the ability to add steering rules to NIC TX flow tables. For now, we are only adding TX bypass (egress) which is used by the RDMA side. This will allow to shape outgoing traffic and tweak it if needed, for example performing encapsulation or rewriting headers. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 11fa4e66afc5..f2281e69ab39 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1120,6 +1120,12 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \ MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap) +#define MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit.cap) + +#define MLX5_CAP_FLOWTABLE_NIC_TX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit.cap) + #define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap) -- cgit v1.2.3 From 90c1d1b8da67330b09893d749401a45328b51704 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:42 +0300 Subject: net/mlx5: Export modify header alloc/dealloc functions Those functions will be used by the RDMA side to create modify header actions to be attached to flow steering rules via verbs. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 804516e4f483..0cbf4d5cb1ab 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -196,4 +196,10 @@ int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); +int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 namespace, u8 num_actions, + void *modify_actions, u32 *modify_header_id); +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, + u32 modify_header_id); + #endif -- cgit v1.2.3 From 61444b458b01c95e55003d6f0b4d4c936fde51cb Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:44 +0300 Subject: net/mlx5: Break encap/decap into two separated flow table creation flags Today we are able to attach encap and decap actions only to the FDB. In preparation to enable those actions on the NIC flow tables, break the single flag into two. Those flags control whatever a decap or encap operations can be attached to the flow table created. For FDB, if encapsulation is required, we set both of them. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Reviewed-by: Or Gerlitz Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 0cbf4d5cb1ab..0194e62ad66a 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -45,7 +45,8 @@ enum { }; enum { - MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0), + MLX5_FLOW_TABLE_TUNNEL_EN_ENCAP = BIT(0), + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1), }; #define LEFTOVERS_RULE_NUM 2 -- cgit v1.2.3 From e0e7a3861b6c6b673dc93e291ef11cf5e746b0c2 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:45 +0300 Subject: net/mlx5: Move header encap type to IFC header file Those bits are hardware specification and should be defined in the IFC header file. Signed-off-by: Mark Bloch Reviewed-by: Or Gerlitz Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f043d65b9bac..bd725e0924e5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4848,6 +4848,11 @@ struct mlx5_ifc_alloc_encap_header_out_bits { u8 reserved_at_60[0x20]; }; +enum { + MLX5_HEADER_TYPE_VXLAN = 0x0, + MLX5_HEADER_TYPE_NVGRE = 0x1, +}; + struct mlx5_ifc_alloc_encap_header_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; -- cgit v1.2.3 From 60786f0987c0d9354e5330ee11615b16cdb448fe Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:46 +0300 Subject: {net, RDMA}/mlx5: Rename encap to reformat packet Renames all encap mlx5_{core,ib} code to use the new naming of packet reformat. This change doesn't introduce any function change and is needed to properly reflect the operation being done by this action. For example not only can we encapsulate a packet, but also decapsulate it. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 4 ++-- include/linux/mlx5/mlx5_ifc.h | 50 +++++++++++++++++++++---------------------- 2 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 0194e62ad66a..37d0c08d0966 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -45,7 +45,7 @@ enum { }; enum { - MLX5_FLOW_TABLE_TUNNEL_EN_ENCAP = BIT(0), + MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT = BIT(0), MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1), }; @@ -160,7 +160,7 @@ struct mlx5_flow_act { u32 action; bool has_flow_tag; u32 flow_tag; - u32 encap_id; + u32 reformat_id; u32 modify_id; uintptr_t esp_id; struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH]; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index bd725e0924e5..c79eaae28e59 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -243,8 +243,8 @@ enum { MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a, MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, - MLX5_CMD_OP_ALLOC_ENCAP_HEADER = 0x93d, - MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e, MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942, @@ -336,7 +336,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 modify_root[0x1]; u8 identified_miss_table_mode[0x1]; u8 flow_table_modify[0x1]; - u8 encap[0x1]; + u8 reformat[0x1]; u8 decap[0x1]; u8 reserved_at_9[0x1]; u8 pop_vlan[0x1]; @@ -599,7 +599,7 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vxlan_encap_decap[0x1]; u8 nvgre_encap_decap[0x1]; u8 reserved_at_22[0x9]; - u8 log_max_encap_headers[0x5]; + u8 log_max_packet_reformat_context[0x5]; u8 reserved_2b[0x6]; u8 max_encap_header_size[0xa]; @@ -2394,7 +2394,7 @@ enum { MLX5_FLOW_CONTEXT_ACTION_DROP = 0x2, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST = 0x4, MLX5_FLOW_CONTEXT_ACTION_COUNT = 0x8, - MLX5_FLOW_CONTEXT_ACTION_ENCAP = 0x10, + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT = 0x10, MLX5_FLOW_CONTEXT_ACTION_DECAP = 0x20, MLX5_FLOW_CONTEXT_ACTION_MOD_HDR = 0x40, MLX5_FLOW_CONTEXT_ACTION_VLAN_POP = 0x80, @@ -2427,7 +2427,7 @@ struct mlx5_ifc_flow_context_bits { u8 reserved_at_a0[0x8]; u8 flow_counter_list_size[0x18]; - u8 encap_id[0x20]; + u8 packet_reformat_id[0x20]; u8 modify_header_id[0x20]; @@ -4802,19 +4802,19 @@ struct mlx5_ifc_query_eq_in_bits { u8 reserved_at_60[0x20]; }; -struct mlx5_ifc_encap_header_in_bits { +struct mlx5_ifc_packet_reformat_context_in_bits { u8 reserved_at_0[0x5]; - u8 header_type[0x3]; + u8 reformat_type[0x3]; u8 reserved_at_8[0xe]; - u8 encap_header_size[0xa]; + u8 reformat_data_size[0xa]; u8 reserved_at_20[0x10]; - u8 encap_header[2][0x8]; + u8 reformat_data[2][0x8]; - u8 more_encap_header[0][0x8]; + u8 more_reformat_data[0][0x8]; }; -struct mlx5_ifc_query_encap_header_out_bits { +struct mlx5_ifc_query_packet_reformat_context_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -4822,38 +4822,38 @@ struct mlx5_ifc_query_encap_header_out_bits { u8 reserved_at_40[0xa0]; - struct mlx5_ifc_encap_header_in_bits encap_header[0]; + struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[0]; }; -struct mlx5_ifc_query_encap_header_in_bits { +struct mlx5_ifc_query_packet_reformat_context_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 encap_id[0x20]; + u8 packet_reformat_id[0x20]; u8 reserved_at_60[0xa0]; }; -struct mlx5_ifc_alloc_encap_header_out_bits { +struct mlx5_ifc_alloc_packet_reformat_context_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 encap_id[0x20]; + u8 packet_reformat_id[0x20]; u8 reserved_at_60[0x20]; }; enum { - MLX5_HEADER_TYPE_VXLAN = 0x0, - MLX5_HEADER_TYPE_NVGRE = 0x1, + MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0, + MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1, }; -struct mlx5_ifc_alloc_encap_header_in_bits { +struct mlx5_ifc_alloc_packet_reformat_context_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -4862,10 +4862,10 @@ struct mlx5_ifc_alloc_encap_header_in_bits { u8 reserved_at_40[0xa0]; - struct mlx5_ifc_encap_header_in_bits encap_header; + struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context; }; -struct mlx5_ifc_dealloc_encap_header_out_bits { +struct mlx5_ifc_dealloc_packet_reformat_context_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -4874,14 +4874,14 @@ struct mlx5_ifc_dealloc_encap_header_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_dealloc_encap_header_in_bits { +struct mlx5_ifc_dealloc_packet_reformat_context_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_20[0x10]; u8 op_mod[0x10]; - u8 encap_id[0x20]; + u8 packet_reformat_id[0x20]; u8 reserved_60[0x20]; }; @@ -6983,7 +6983,7 @@ struct mlx5_ifc_create_flow_table_out_bits { }; struct mlx5_ifc_flow_table_context_bits { - u8 encap_en[0x1]; + u8 reformat_en[0x1]; u8 decap_en[0x1]; u8 reserved_at_2[0x2]; u8 table_miss_action[0x4]; -- cgit v1.2.3 From bea4e1f6c6c5744d467ebf8b0699f5e391835130 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:47 +0300 Subject: net/mlx5: Expose new packet reformat capabilities Expose new abilities when creating a packet reformat context. The new types which can be created are: MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: Ability to create generic encap operation to be done by the HW. MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2: Ability to create generic decap operation where the inner packet doesn't contain L2. MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: Ability to create generic encap operation to be done by the HW. The L2 of the original packet is dropped. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c79eaae28e59..3a4a2e0567e9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -344,8 +344,12 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_c[0x1]; u8 pop_vlan_2[0x1]; u8 push_vlan_2[0x1]; - u8 reserved_at_f[0x11]; - + u8 reformat_and_vlan_action[0x1]; + u8 reserved_at_10[0x2]; + u8 reformat_l3_tunnel_to_l2[0x1]; + u8 reformat_l2_to_l3_tunnel[0x1]; + u8 reformat_and_modify_action[0x1]; + u8 reserved_at_14[0xb]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; @@ -554,7 +558,13 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 nic_rx_multi_path_tirs[0x1]; u8 nic_rx_multi_path_tirs_fts[0x1]; u8 allow_sniffer_and_nic_rx_shared_tir[0x1]; - u8 reserved_at_3[0x1fd]; + u8 reserved_at_3[0x1d]; + u8 encap_general_header[0x1]; + u8 reserved_at_21[0xa]; + u8 log_max_packet_reformat_context[0x5]; + u8 reserved_at_30[0x6]; + u8 max_encap_header_size[0xa]; + u8 reserved_at_40[0x1c0]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; @@ -4851,6 +4861,9 @@ struct mlx5_ifc_alloc_packet_reformat_context_out_bits { enum { MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0, MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1, + MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2, + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3, + MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4, }; struct mlx5_ifc_alloc_packet_reformat_context_in_bits { -- cgit v1.2.3 From 50acec06f3928fc29647aecf1270e54cae583afb Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:49 +0300 Subject: net/mlx5: Export packet reformat alloc/dealloc functions This will allow for the RDMA side to allocate packet reformat context. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 37d0c08d0966..b1c026f1c8ba 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -203,4 +203,13 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id); +int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + u32 *packet_reformat_id); +void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, + u32 packet_reformat_id); + #endif -- cgit v1.2.3 From adf8ed01e4fdd254efead978d633718ab01a7d5c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 31 Aug 2018 11:31:08 +0300 Subject: mac80211: add an optional TXQ for other PS-buffered frames Some drivers may want to also use the TXQ abstraction with non-data packets that need powersave buffering, so add a hardware flag to allow this. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e248f5fe5b19..03e1dfd311f7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -101,8 +101,9 @@ * Drivers indicate that they use this model by implementing the .wake_tx_queue * driver operation. * - * Intermediate queues (struct ieee80211_txq) are kept per-sta per-tid, with a - * single per-vif queue for multicast data frames. + * Intermediate queues (struct ieee80211_txq) are kept per-sta per-tid, with + * another per-sta for non-data/non-mgmt and bufferable management frames, and + * a single per-vif queue for multicast data frames. * * The driver is expected to initialize its private per-queue data for stations * and interfaces in the .add_interface and .sta_add ops. @@ -1843,7 +1844,8 @@ struct ieee80211_sta_rates { * unlimited. * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. - * @txq: per-TID data TX queues (if driver uses the TXQ abstraction) + * @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that + * the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames */ struct ieee80211_sta { u32 supp_rates[NUM_NL80211_BANDS]; @@ -1884,7 +1886,7 @@ struct ieee80211_sta { bool support_p2p_ps; u16 max_rc_amsdu_len; - struct ieee80211_txq *txq[IEEE80211_NUM_TIDS]; + struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1]; /* must be last */ u8 drv_priv[0] __aligned(sizeof(void *)); @@ -1918,7 +1920,8 @@ struct ieee80211_tx_control { * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @sta: station table entry, %NULL for per-vif queue - * @tid: the TID for this queue (unused for per-vif queue) + * @tid: the TID for this queue (unused for per-vif queue), + * %IEEE80211_NUM_TIDS for non-data (if enabled) * @ac: the AC for this queue * @drv_priv: driver private area, sized by hw->txq_data_size * @@ -2131,6 +2134,9 @@ struct ieee80211_txq { * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't * support QoS NDP for AP probing - that's most likely a driver bug. * + * @IEEE80211_HW_BUFF_MMPDU_TXQ: use the TXQ for bufferable MMPDUs, this of + * course requires the driver to use TXQs to start with. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2176,6 +2182,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA, IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP, + IEEE80211_HW_BUFF_MMPDU_TXQ, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS -- cgit v1.2.3 From 03512ceb60ae4be71ed3129dabb8625224c8ec40 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Fri, 31 Aug 2018 11:31:09 +0300 Subject: ieee80211: remove redundant leading zeroes The defines of IEEE80211_HE_OPERATION_VHT_OPER_INFO and IEEE80211_HE_OPERATION_MULTI_BSSID_AP have leading zeroes that makes the number look like it is bigger than 32 bit. This is misleading, remove it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 9c03a7d5e400..17ea51d088ae 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1963,8 +1963,8 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) #define IEEE80211_HE_OPERATION_TWT_REQUIRED 0x00000200 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK 0x000ffc00 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET 10 -#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x000100000 -#define IEEE80211_HE_OPERATION_VHT_OPER_INFO 0x000200000 +#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x00100000 +#define IEEE80211_HE_OPERATION_VHT_OPER_INFO 0x00200000 #define IEEE80211_HE_OPERATION_MULTI_BSSID_AP 0x10000000 #define IEEE80211_HE_OPERATION_TX_BSSID_INDICATOR 0x20000000 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED 0x40000000 -- cgit v1.2.3 From 244eb9ae797385c2ed244f6bdf0534fcaa6f0d33 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Fri, 31 Aug 2018 11:31:14 +0300 Subject: cfg80211: add he_capabilities (ext) IE to AP settings Same as for HT and VHT. This helps the lower level to know whether the AP supports HE. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 33c2a1d2a8d2..9f3ed79c39d7 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -849,6 +849,7 @@ struct cfg80211_bitrate_mask { * @beacon_rate: bitrate to be used for beacons * @ht_cap: HT capabilities (or %NULL if HT isn't enabled) * @vht_cap: VHT capabilities (or %NULL if VHT isn't enabled) + * @he_cap: HE capabilities (or %NULL if HE isn't enabled) * @ht_required: stations must support HT * @vht_required: stations must support VHT */ @@ -874,6 +875,7 @@ struct cfg80211_ap_settings { const struct ieee80211_ht_cap *ht_cap; const struct ieee80211_vht_cap *vht_cap; + const struct ieee80211_he_cap_elem *he_cap; bool ht_required, vht_required; }; -- cgit v1.2.3 From b0aa75f0b1b2e6bc77128fab36c8ed87e84917cc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 31 Aug 2018 11:31:16 +0300 Subject: ieee80211: add new VHT capability fields/parsing IEEE 802.11-2016 extended the VHT capability fields to allow indicating the number of spatial streams depending on the actually used bandwidth, add support for decoding this. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 17ea51d088ae..280600a10111 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1460,13 +1460,16 @@ struct ieee80211_ht_operation { * STA can receive. Rate expressed in units of 1 Mbps. * If this field is 0 this value should not be used to * consider the highest RX data rate supported. - * The top 3 bits of this field are reserved. + * The top 3 bits of this field indicate the Maximum NSTS,total + * (a beamformee capability.) * @tx_mcs_map: TX MCS map 2 bits for each stream, total 8 streams * @tx_highest: Indicates highest long GI VHT PPDU data rate * STA can transmit. Rate expressed in units of 1 Mbps. * If this field is 0 this value should not be used to * consider the highest TX data rate supported. - * The top 3 bits of this field are reserved. + * The top 2 bits of this field are reserved, the + * 3rd bit from the top indiciates VHT Extended NSS BW + * Capability. */ struct ieee80211_vht_mcs_info { __le16 rx_mcs_map; @@ -1475,6 +1478,13 @@ struct ieee80211_vht_mcs_info { __le16 tx_highest; } __packed; +/* for rx_highest */ +#define IEEE80211_VHT_MAX_NSTS_TOTAL_SHIFT 13 +#define IEEE80211_VHT_MAX_NSTS_TOTAL_MASK (7 << IEEE80211_VHT_MAX_NSTS_TOTAL_SHIFT) + +/* for tx_highest */ +#define IEEE80211_VHT_EXT_NSS_BW_CAPABLE (1 << 13) + /** * enum ieee80211_vht_mcs_support - VHT MCS support definitions * @IEEE80211_VHT_MCS_SUPPORT_0_7: MCSes 0-7 are supported for the @@ -1650,6 +1660,7 @@ struct ieee80211_mu_edca_param_set { #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ 0x00000004 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ 0x00000008 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK 0x0000000C +#define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_SHIFT 2 #define IEEE80211_VHT_CAP_RXLDPC 0x00000010 #define IEEE80211_VHT_CAP_SHORT_GI_80 0x00000020 #define IEEE80211_VHT_CAP_SHORT_GI_160 0x00000040 @@ -1678,6 +1689,26 @@ struct ieee80211_mu_edca_param_set { #define IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB 0x0c000000 #define IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN 0x10000000 #define IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN 0x20000000 +#define IEEE80211_VHT_CAP_EXT_NSS_BW_SHIFT 30 +#define IEEE80211_VHT_CAP_EXT_NSS_BW_MASK 0xc0000000 + +/** + * ieee80211_get_vht_max_nss - return max NSS for a given bandwidth/MCS + * @cap: VHT capabilities of the peer + * @bw: bandwidth to use + * @mcs: MCS index to use + * @ext_nss_bw_capable: indicates whether or not the local transmitter + * (rate scaling algorithm) can deal with the new logic + * (dot11VHTExtendedNSSBWCapable) + * + * Due to the VHT Extended NSS Bandwidth Support, the maximum NSS can + * vary for a given BW/MCS. This function parses the data. + * + * Note: This function is exported by cfg80211. + */ +int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, + enum ieee80211_vht_chanwidth bw, + int mcs, bool ext_nss_bw_capable); /* 802.11ax HE MAC capabilities */ #define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 -- cgit v1.2.3 From 09b4a4faf9d037990ac4f8110dd944b27b42d5df Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 31 Aug 2018 11:31:17 +0300 Subject: mac80211: introduce capability flags for VHT EXT NSS support Depending on whether or not rate control supports selecting rates depending on the bandwidth, we can use VHT extended NSS support. In essence, this is dot11VHTExtendedNSSBWCapable from the spec, since depending on that we'll need to parse the bandwidth. If needed, also set/clear the VHT Capability Element bit for this capability so that we don't advertise it erroneously or don't advertise it when we actually use it. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 03e1dfd311f7..00e2e9909d45 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2137,6 +2137,12 @@ struct ieee80211_txq { * @IEEE80211_HW_BUFF_MMPDU_TXQ: use the TXQ for bufferable MMPDUs, this of * course requires the driver to use TXQs to start with. * + * @IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW: (Hardware) rate control supports VHT + * extended NSS BW (dot11VHTExtendedNSSBWCapable). This flag will be set if + * the selected rate control algorithm sets %RATE_CTRL_CAPA_VHT_EXT_NSS_BW + * but if the rate control is built-in then it must be set by the driver. + * See also the documentation for that flag. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2183,6 +2189,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP, IEEE80211_HW_BUFF_MMPDU_TXQ, + IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -5655,7 +5662,22 @@ struct ieee80211_tx_rate_control { bool bss; }; +/** + * enum rate_control_capabilities - rate control capabilities + */ +enum rate_control_capabilities { + /** + * @RATE_CTRL_CAPA_VHT_EXT_NSS_BW: + * Support for extended NSS BW support (dot11VHTExtendedNSSCapable) + * Note that this is only looked at if the minimum number of chains + * that the AP uses is < the number of TX chains the hardware has, + * otherwise the NSS difference doesn't bother us. + */ + RATE_CTRL_CAPA_VHT_EXT_NSS_BW = BIT(0), +}; + struct rate_control_ops { + unsigned long capa; const char *name; void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir); void (*free)(void *priv); -- cgit v1.2.3 From 70e53669c4c41b0fc043cb0bcb518b53428edf64 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Wed, 8 Aug 2018 18:40:01 +0800 Subject: mac80211: Store sk_pacing_shift in ieee80211_hw Make it possibly for drivers to adjust the default skb_pacing_shift by storing it in the hardware struct. Signed-off-by: Wen Gong [adjust commit log, move & adjust comment] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 00e2e9909d45..f8247d2658ac 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2308,6 +2308,10 @@ enum ieee80211_hw_flags { * supported by HW. * @max_nan_de_entries: maximum number of NAN DE functions supported by the * device. + * + * @tx_sk_pacing_shift: Pacing shift to set on TCP sockets when frames from + * them are encountered. The default should typically not be changed, + * unless the driver has good reasons for needing more buffers. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -2343,6 +2347,7 @@ struct ieee80211_hw { u8 n_cipher_schemes; const struct ieee80211_cipher_scheme *cipher_schemes; u8 max_nan_de_entries; + u8 tx_sk_pacing_shift; }; static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, -- cgit v1.2.3 From d1332e7be25088383527e3de325930bea64780cb Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Fri, 31 Aug 2018 11:31:20 +0300 Subject: mac80211: support radiotap L-SIG data As before with HE, the data needs to be provided by the driver in the skb head, since there's not enough space in the skb CB. Signed-off-by: Johannes Berg Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 15 +++++++++++++++ include/net/mac80211.h | 2 ++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index feef706e1158..80d543902b8b 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -75,6 +75,7 @@ enum ieee80211_radiotap_presence { IEEE80211_RADIOTAP_TIMESTAMP = 22, IEEE80211_RADIOTAP_HE = 23, IEEE80211_RADIOTAP_HE_MU = 24, + IEEE80211_RADIOTAP_LSIG = 27, /* valid in every it_present bitmap, even vendor namespaces */ IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29, @@ -325,6 +326,20 @@ enum ieee80211_radiotap_he_mu_bits { IEEE80211_RADIOTAP_HE_MU_FLAGS2_CH2_CTR_26T_RU = 0x0800, }; +enum ieee80211_radiotap_lsig_data1 { + IEEE80211_RADIOTAP_LSIG_DATA1_RATE_KNOWN = 0x0001, + IEEE80211_RADIOTAP_LSIG_DATA1_LENGTH_KNOWN = 0x0002, +}; + +enum ieee80211_radiotap_lsig_data2 { + IEEE80211_RADIOTAP_LSIG_DATA2_RATE = 0x000f, + IEEE80211_RADIOTAP_LSIG_DATA2_LENGTH = 0xfff0, +}; + +struct ieee80211_radiotap_lsig { + __le16 data1, data2; +}; + /** * ieee80211_get_radiotap_len - get radiotap header length */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f8247d2658ac..3cc1ca17a1a8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1141,6 +1141,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * from the RX info data, so leave those zeroed when building this data) * @RX_FLAG_RADIOTAP_HE_MU: HE MU radiotap data is present * (&struct ieee80211_radiotap_he_mu) + * @RX_FLAG_RADIOTAP_LSIG: L-SIG radiotap data is present */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1171,6 +1172,7 @@ enum mac80211_rx_flags { RX_FLAG_AMPDU_EOF_BIT_KNOWN = BIT(25), RX_FLAG_RADIOTAP_HE = BIT(26), RX_FLAG_RADIOTAP_HE_MU = BIT(27), + RX_FLAG_RADIOTAP_LSIG = BIT(28), }; /** -- cgit v1.2.3 From 2b815b04dfe45d1278fd4137675fe1398f656b0a Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Fri, 31 Aug 2018 15:00:37 +0200 Subject: nl80211: Add CAN_REPLACE_PTK0 API Drivers able to correctly replace a in-use key should set @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 to allow the user space (e.g. hostapd or wpa_supplicant) to rekey PTK keys. The user space must detect a PTK rekey attempt and only go ahead with it when the driver has set this flag. If the driver is not supporting the feature the user space either must not replace the PTK key or perform a full re-association instead. Ignoring this flag and continuing to rekey the connection can still work but has to be considered insecure and broken. Depending on the driver it can leak clear text packets or freeze the connection and is only supported to allow the user space to be updated. Signed-off-by: Alexander Wetzel Reviewed-by: Denis Kenzior Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1766a12b231c..cfc94178d608 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5226,6 +5226,11 @@ enum nl80211_feature_flags { * except for supported rates from the probe request content if requested * by the %NL80211_SCAN_FLAG_MIN_PREQ_CONTENT flag. * + * @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0: Driver/device confirm that they are + * able to rekey an in-use key correctly. Userspace must not rekey PTK keys + * if this flag is not set. Ignoring this can leak clear text packets and/or + * freeze the connection. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5263,6 +5268,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_TXQS, NL80211_EXT_FEATURE_SCAN_RANDOM_SN, NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT, + NL80211_EXT_FEATURE_CAN_REPLACE_PTK0, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 62872a9b9a106f00360193f428451c321ec2e823 Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Fri, 31 Aug 2018 15:00:38 +0200 Subject: mac80211: Fix PTK rekey freezes and clear text leak Rekeying PTK keys without "Extended Key ID for Individually Addressed Frames" did use a procedure not suitable to replace in-use keys and could caused the following issues: 1) Freeze caused by incoming frames: If the local STA installed the key prior to the remote STA we still had the old key active in the hardware when mac80211 switched over to the new key. Therefore there was a window where the card could hand over frames decoded with the old key to mac80211 and bump the new PN (IV) value to an incorrect high number. When it happened the local replay detection silently started to drop all frames sent with the new key. 2) Freeze caused by outgoing frames: If mac80211 was providing the PN (IV) and handed over a clear text frame for encryption to the hardware prior to a key change the driver/card could have processed the queued frame after switching to the new key. This bumped the PN value on the remote STA to an incorrect high number, tricking the remote STA to discard all frames we sent later. 3) Freeze caused by RX aggregation reorder buffer: An aggregation session started with the old key and ending after the switch to the new key also bumped the PN to an incorrect high number, freezing the connection quite similar to 1). 4) Freeze caused by repeating lost frames in an aggregation session: A driver could repeat a lost frame and encrypt it with the new key while in a TX aggregation session without updating the PN for the new key. This also could freeze connections similar to 2). 5) Clear text leak: Removing encryption offload from the card cleared the encryption offload flag only after the card had deleted the key and we did not stop TX during the rekey. The driver/card could therefore get unencrypted frames from mac80211 while no longer be instructed to encrypt them. To prevent those issues the key install logic has been changed: - Mac80211 divers known to be able to rekey PTK0 keys have to set @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0, - mac80211 stops queuing frames depending on the key during the replace - the key is first replaced in the hardware and after that in mac80211 - and mac80211 stops/blocks new aggregation sessions during the rekey. For drivers not setting @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 the user space must avoid PTK rekeys if "Extended Key ID for Individually Addressed Frames" is not being used. Rekeys for mac80211 drivers without this flag will generate a warning and use an extra call to ieee80211_flush_queues() to both highlight and try to prevent the issues with not updated drivers. The core of the fix changes the key install procedure from: - atomic switch over to the new key in mac80211 - remove the old key in the hardware (stops encryption offloading, fall back to software encryption with a potential clear text packet leak in between) - delete the inactive old key in mac80211 - enable hardware encryption offloading for the new key to: - if it's a PTK mark the old key as tainted to drop TX frames with the outgoing key - replace the key in hardware with the new one - atomic switch over to the new (not marked as tainted) key in mac80211 (which also resumes TX) - delete the inactive old key in mac80211 With the new sequence the hardware will be unable to decrypt frames encrypted with the old key prior to switching to the new key in mac80211 and thus prevent PNs from packets decrypted with the old key to be accounted against the new key. For that to work the drivers have to provide a clear boundary. Mac80211 drivers setting @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 confirm to provide it and mac80211 will then be able to correctly rekey in-use PTK keys with those drivers. The mac80211 requirements for drivers to set the flag have been added to the "Hardware crypto acceleration" documentation section. It drills down to: The drivers must not hand over frames decrypted with the old key to mac80211 once the call to set_key() with %DISABLE_KEY has been completed. It's allowed to either drop or continue to use the old key for any outgoing frames which are already in the queues, but it must not send out any of them unencrypted or encrypted with the new key. Even with the new boundary in place aggregation sessions with the reorder buffer are problematic: RX aggregation session started prior and completed after the rekey could still dump frames received with the old key at mac80211 after it switched over to the new key. This is side stepped by stopping all (RX and TX) aggregation sessions when replacing a PTK key and hardware key offloading. Stopping TX aggregation sessions avoids the need to get the PNs (IVs) updated in frames prepared for the old key and (re)transmitted after the switch to the new key. As a bonus it improves the compatibility when the remote STA is not handling rekeys as it should. When using software crypto aggregation sessions are not stopped. Mac80211 won't be able to decode the dangerous frames and discard them without special handling. Signed-off-by: Alexander Wetzel [trim overly long rekey warning] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3cc1ca17a1a8..8c26d2d36cbe 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2531,6 +2531,19 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * The set_default_unicast_key() call updates the default WEP key index * configured to the hardware for WEP encryption type. This is required * for devices that support offload of data packets (e.g. ARP responses). + * + * Mac80211 drivers should set the @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 flag + * when they are able to replace in-use PTK keys according to to following + * requirements: + * 1) They do not hand over frames decrypted with the old key to + mac80211 once the call to set_key() with command %DISABLE_KEY has been + completed when also setting @IEEE80211_KEY_FLAG_GENERATE_IV for any key, + 2) either drop or continue to use the old key for any outgoing frames queued + at the time of the key deletion (including re-transmits), + 3) never send out a frame queued prior to the set_key() %SET_KEY command + encrypted with the new key and + 4) never send out a frame unencrypted when it should be encrypted. + Mac80211 will not queue any new frames for a deleted key to the driver. */ /** -- cgit v1.2.3 From c3d1f8752802b2e1fb12c73bee50035bc125bc54 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 5 Sep 2018 08:06:06 +0300 Subject: mac80211: support reporting 0-length PSDU in radiotap For certain sounding frames, it may be useful to report them to userspace even though they don't have a PSDU in order to determine the PHY parameters (e.g. VHT rate/stream config.) Add support for this to mac80211. Signed-off-by: Johannes Berg Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 6 ++++++ include/net/mac80211.h | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 80d543902b8b..8014153bdd49 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -75,6 +75,7 @@ enum ieee80211_radiotap_presence { IEEE80211_RADIOTAP_TIMESTAMP = 22, IEEE80211_RADIOTAP_HE = 23, IEEE80211_RADIOTAP_HE_MU = 24, + IEEE80211_RADIOTAP_ZERO_LEN_PSDU = 26, IEEE80211_RADIOTAP_LSIG = 27, /* valid in every it_present bitmap, even vendor namespaces */ @@ -340,6 +341,11 @@ struct ieee80211_radiotap_lsig { __le16 data1, data2; }; +enum ieee80211_radiotap_zero_len_psdu_type { + IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING = 0, + IEEE80211_RADIOTAP_ZERO_LEN_PSDU_VENDOR = 0xff, +}; + /** * ieee80211_get_radiotap_len - get radiotap header length */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8c26d2d36cbe..50bf598abdfd 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1142,6 +1142,10 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_RADIOTAP_HE_MU: HE MU radiotap data is present * (&struct ieee80211_radiotap_he_mu) * @RX_FLAG_RADIOTAP_LSIG: L-SIG radiotap data is present + * @RX_FLAG_NO_PSDU: use the frame only for radiotap reporting, with + * the "0-length PSDU" field included there. The value for it is + * in &struct ieee80211_rx_status. Note that if this value isn't + * known the frame shouldn't be reported. */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1173,6 +1177,7 @@ enum mac80211_rx_flags { RX_FLAG_RADIOTAP_HE = BIT(26), RX_FLAG_RADIOTAP_HE_MU = BIT(27), RX_FLAG_RADIOTAP_LSIG = BIT(28), + RX_FLAG_NO_PSDU = BIT(29), }; /** @@ -1245,6 +1250,7 @@ enum mac80211_rx_encoding { * @ampdu_reference: A-MPDU reference number, must be a different value for * each A-MPDU but the same for each subframe within one A-MPDU * @ampdu_delimiter_crc: A-MPDU delimiter CRC + * @zero_length_psdu_type: radiotap type of the 0-length PSDU */ struct ieee80211_rx_status { u64 mactime; @@ -1265,6 +1271,7 @@ struct ieee80211_rx_status { u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; u8 ampdu_delimiter_crc; + u8 zero_length_psdu_type; }; /** -- cgit v1.2.3 From add7453ad62f05c8f1a48675bb4dfed52e6ac878 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 5 Sep 2018 08:06:08 +0300 Subject: wireless: align to draft 11ax D3.0 Align to new 11ax draft D3.0. Change/add new MAC and PHY capabilities and update drivers' 11ax capabilities and mac80211's debugfs accordingly. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 72 ++++++++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 280600a10111..c4809ad8ab46 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1555,11 +1555,11 @@ struct ieee80211_vht_operation { * struct ieee80211_he_cap_elem - HE capabilities element * * This structure is the "HE capabilities element" fixed fields as - * described in P802.11ax_D2.0 section 9.4.2.237.2 and 9.4.2.237.3 + * described in P802.11ax_D3.0 section 9.4.2.237.2 and 9.4.2.237.3 */ struct ieee80211_he_cap_elem { - u8 mac_cap_info[5]; - u8 phy_cap_info[9]; + u8 mac_cap_info[6]; + u8 phy_cap_info[11]; } __packed; #define IEEE80211_TX_RX_MCS_NSS_DESC_MAX_LEN 5 @@ -1738,15 +1738,15 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_8US 0x04 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US 0x08 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_MASK 0x0c -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_1 0x00 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_2 0x10 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_3 0x20 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_4 0x30 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_5 0x40 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_6 0x50 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_7 0x60 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_8 0x70 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_MASK 0x70 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_1 0x00 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_2 0x10 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_3 0x20 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_4 0x30 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_5 0x40 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_6 0x50 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_7 0x60 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8 0x70 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_MASK 0x70 /* Link adaptation is split between byte HE_MAC_CAP1 and * HE_MAC_CAP2. It should be set only if IEEE80211_HE_MAC_CAP0_HTC_HE @@ -1760,14 +1760,13 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_MAC_CAP2_LINK_ADAPTATION 0x01 #define IEEE80211_HE_MAC_CAP2_ALL_ACK 0x02 -#define IEEE80211_HE_MAC_CAP2_UL_MU_RESP_SCHED 0x04 +#define IEEE80211_HE_MAC_CAP2_TRS 0x04 #define IEEE80211_HE_MAC_CAP2_BSR 0x08 #define IEEE80211_HE_MAC_CAP2_BCAST_TWT 0x10 #define IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP 0x20 #define IEEE80211_HE_MAC_CAP2_MU_CASCADING 0x40 #define IEEE80211_HE_MAC_CAP2_ACK_EN 0x80 -#define IEEE80211_HE_MAC_CAP3_GRP_ADDR_MULTI_STA_BA_DL_MU 0x01 #define IEEE80211_HE_MAC_CAP3_OMI_CONTROL 0x02 #define IEEE80211_HE_MAC_CAP3_OFDMA_RA 0x04 @@ -1775,25 +1774,34 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, * A-MDPU Length Exponent field in the HT capabilities, VHT capabilities and the * same field in the HE capabilities. */ -#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_USE_VHT 0x00 -#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_1 0x08 -#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2 0x10 -#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_RESERVED 0x18 -#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_MASK 0x18 -#define IEEE80211_HE_MAC_CAP3_A_AMSDU_FRAG 0x20 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_USE_VHT 0x00 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_1 0x08 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2 0x10 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED 0x18 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK 0x18 +#define IEEE80211_HE_MAC_CAP3_AMSDU_FRAG 0x20 #define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED 0x40 #define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS 0x80 #define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG 0x01 #define IEEE80211_HE_MAC_CAP4_QTP 0x02 #define IEEE80211_HE_MAC_CAP4_BQR 0x04 -#define IEEE80211_HE_MAC_CAP4_SR_RESP 0x08 +#define IEEE80211_HE_MAC_CAP4_SRP_RESP 0x08 #define IEEE80211_HE_MAC_CAP4_NDP_FB_REP 0x10 #define IEEE80211_HE_MAC_CAP4_OPS 0x20 #define IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU 0x40 +/* Multi TID agg TX is split between byte #4 and #5 + * The value is a combination of B39,B40,B41 + */ +#define IEEE80211_HE_MAC_CAP4_MULTI_TID_AGG_TX_QOS_B39 0x80 + +#define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B40 0x01 +#define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B41 0x02 +#define IEEE80211_HE_MAC_CAP5_SUBCHAN_SELECVITE_TRANSMISSION 0x04 +#define IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU 0x08 +#define IEEE80211_HE_MAC_CAP5_OM_CTRL_UL_MU_DATA_DIS_RX 0x10 /* 802.11ax HE PHY capabilities */ -#define IEEE80211_HE_PHY_CAP0_DUAL_BAND 0x01 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G 0x02 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G 0x04 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G 0x08 @@ -1810,10 +1818,10 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A 0x10 #define IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD 0x20 #define IEEE80211_HE_PHY_CAP1_HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US 0x40 -/* Midamble RX Max NSTS is split between byte #2 and byte #3 */ -#define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS 0x80 +/* Midamble RX/TX Max NSTS is split between byte #2 and byte #3 */ +#define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS 0x80 -#define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_MAX_NSTS 0x01 +#define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_TX_MAX_NSTS 0x01 #define IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US 0x02 #define IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ 0x04 #define IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ 0x08 @@ -1914,7 +1922,19 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU 0x04 #define IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU 0x08 #define IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI 0x10 -#define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_2X_AND_1XLTF 0x20 +#define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_TX_2X_AND_1XLTF 0x20 +#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_20MHZ 0x00 +#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_40MHZ 0x40 +#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_80MHZ 0x80 +#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_160_OR_80P80_MHZ 0xc0 +#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_MASK 0xc0 + +#define IEEE80211_HE_PHY_CAP9_LONGER_THAN_16_SIGB_OFDM_SYM 0x01 +#define IEEE80211_HE_PHY_CAP9_NON_TRIGGERED_CQI_FEEDBACK 0x02 +#define IEEE80211_HE_PHY_CAP9_TX_1024_QAM_LESS_THAN_242_TONE_RU 0x04 +#define IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU 0x08 +#define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB 0x10 +#define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB 0x20 /* 802.11ax HE TX/RX MCS NSS Support */ #define IEEE80211_TX_RX_MCS_NSS_SUPP_HIGHEST_MCS_POS (3) -- cgit v1.2.3 From 0eeb2b674f05ccb5162a1d68c0b8ae81e25fd972 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 5 Sep 2018 08:06:09 +0300 Subject: mac80211: add an option for station management TXQ We have a TXQ abstraction for non-data packets that need powersave buffering. Since the AP cannot sleep, in case of station we can use this TXQ for all management frames, regardless if they are bufferable. Add HW flag to allow that. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 50bf598abdfd..fe71cec8ba42 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2152,6 +2152,10 @@ struct ieee80211_txq { * but if the rate control is built-in then it must be set by the driver. * See also the documentation for that flag. * + * @IEEE80211_HW_STA_MMPDU_TXQ: use the extra non-TID per-station TXQ for all + * MMPDUs on station interfaces. This of course requires the driver to use + * TXQs to start with. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2199,6 +2203,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP, IEEE80211_HW_BUFF_MMPDU_TXQ, IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, + IEEE80211_HW_STA_MMPDU_TXQ, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS -- cgit v1.2.3 From edba6bdad6fef787c0363e8a1e7d91e8d6a10129 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 5 Sep 2018 08:06:10 +0300 Subject: mac80211: allow AMSDU size limitation per-TID Some drivers may have AMSDU size limitation per TID, due to HW constrains. Add an option to set this limit. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fe71cec8ba42..28da9e27ea70 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1853,6 +1853,7 @@ struct ieee80211_sta_rates { * unlimited. * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control. + * @max_tid_amsdu_len: Maximum A-MSDU size in bytes for this TID * @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that * the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames */ @@ -1894,6 +1895,7 @@ struct ieee80211_sta { u16 max_amsdu_len; bool support_p2p_ps; u16 max_rc_amsdu_len; + u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS]; struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1]; -- cgit v1.2.3 From 9739fe29a207ffff55361a3047e7780ebddccdb2 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 5 Sep 2018 08:06:11 +0300 Subject: mac80211: add an option for drivers to check if packets can be aggregated Some hardwares have limitations on the packets' type in AMSDU. Add an optional driver callback to determine if two skbs can be used in the same AMSDU or not. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 28da9e27ea70..c4fadbafbf21 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3594,6 +3594,10 @@ enum ieee80211_reconfig_type { * @del_nan_func: Remove a NAN function. The driver must call * ieee80211_nan_func_terminated() with * NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST reason code upon removal. + * @can_aggregate_in_amsdu: Called in order to determine if HW supports + * aggregating two specific frames in the same A-MSDU. The relation + * between the skbs should be symmetric and transitive. Note that while + * skb is always a real frame, head may or may not be an A-MSDU. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -3876,6 +3880,9 @@ struct ieee80211_ops { void (*del_nan_func)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u8 instance_id); + bool (*can_aggregate_in_amsdu)(struct ieee80211_hw *hw, + struct sk_buff *head, + struct sk_buff *skb); }; /** -- cgit v1.2.3 From 83033688b7ade18d2dbbcefa810f02ff66ba549d Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 23 Jul 2018 10:55:39 +0300 Subject: net/mlx5: Change flow counters addlist type to single linked list In order to prevent flow counters stats work function from traversing whole flow counters tree while searching for deleted flow counters, new list to store deleted flow counters will be added to struct mlx5_fc_stats. However, the flow counter structure itself has no space left to store any more data in first cache line. To free space that is needed to store additional list node, convert current addlist double linked list (two pointers per node) to atomic single linked list (one pointer per node). Lockless NULL-terminated single linked list data type doesn't require any additional external synchronization for operations used by flow counters module (add single new element, remove all elements from list and traverse them). Remove addlist_lock that is no longer needed. Signed-off-by: Vlad Buslov Acked-by: Amir Vadai Reviewed-by: Paul Blakey Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7a452716de4b..c00549293982 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -584,9 +584,7 @@ struct mlx5_irq_info { struct mlx5_fc_stats { struct rb_root counters; - struct list_head addlist; - /* protect addlist add/splice operations */ - spinlock_t addlist_lock; + struct llist_head addlist; struct workqueue_struct *wq; struct delayed_work work; -- cgit v1.2.3 From 6e5e22839136fdb466af0aa46ff2404713dff974 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 23 Jul 2018 11:32:05 +0300 Subject: net/mlx5: Add new list to store deleted flow counters In order to prevent flow counters stats work function from traversing whole flow counters tree while searching for deleted flow counters, new list to store deleted flow counters is added to struct mlx5_fc_stats. Lockless NULL-terminated single linked list data type is used due to following reasons: - This use case only needs to add single element to list and remove/iterate whole list. Lockless list doesn't require any additional synchronization for these operations. - First cache line of flow counter data structure only has space to store single additional pointer, which precludes usage of double linked list. Remove flow counter 'deleted' flag that is no longer needed. Signed-off-by: Vlad Buslov Acked-by: Amir Vadai Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c00549293982..4b53ac64004b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -585,6 +585,7 @@ struct mlx5_irq_info { struct mlx5_fc_stats { struct rb_root counters; struct llist_head addlist; + struct llist_head dellist; struct workqueue_struct *wq; struct delayed_work work; -- cgit v1.2.3 From 9aff93d7d0d4b3f3076d7bd12a4ad06ef1cf9804 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 24 Jul 2018 09:52:11 +0300 Subject: net/mlx5: Store flow counters in a list In order to improve performance of flow counter stats query loop that traverses all configured flow counters, replace rb_tree with double-linked list. This change improves performance of traversing flow counters by removing the tree traversal. (profiling data showed that call to rb_next was most top CPU consumer) However, lookup of flow flow counter in list becomes linear, instead of logarithmic. This problem is fixed by next patch in series, which adds idr for fast lookup. Idr is to be used because it is not an intrusive data structure and doesn't require adding any new members to struct mlx5_fc, which allows its control data part to stay <= 1 cache line in size. Signed-off-by: Vlad Buslov Acked-by: Amir Vadai Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4b53ac64004b..61bed33e6675 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -583,7 +583,7 @@ struct mlx5_irq_info { }; struct mlx5_fc_stats { - struct rb_root counters; + struct list_head counters; struct llist_head addlist; struct llist_head dellist; -- cgit v1.2.3 From 12d6066c3b29c5606c4a2466f964fbd9ede803c5 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 24 Jul 2018 16:37:40 +0300 Subject: net/mlx5: Add flow counters idr Previous patch in series changed flow counter storage structure from rb_tree to linked list in order to improve flow counter traversal performance. The drawback of such solution is that flow counter lookup by id becomes linear in complexity. Store pointers to flow counters in idr in order to improve lookup performance to logarithmic again. Idr is non-intrusive data structure and doesn't require extending flow counter struct with new elements. This means that idr can be used for lookup, while linked list from previous patch is used for traversal, and struct mlx5_fc size is <= 2 cache lines. Signed-off-by: Vlad Buslov Acked-by: Amir Vadai Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 61bed33e6675..2a0c845f6bdb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -583,6 +583,8 @@ struct mlx5_irq_info { }; struct mlx5_fc_stats { + spinlock_t counters_idr_lock; /* protects counters_idr */ + struct idr counters_idr; struct list_head counters; struct llist_head addlist; struct llist_head dellist; -- cgit v1.2.3 From 64109f1dc41f25f4a9c6b114e04b6266bf4128ad Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Tue, 5 Jun 2018 09:22:18 +0300 Subject: net/mlx5e: Replace PTP clock lock from RW lock to seq lock Changed "priv.clock.lock" lock from 'rw_lock' to 'seq_lock' in order to improve packet rate performance. Tested on Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz. Sent 64b packets between two peers connected by ConnectX-5, and measured packet rate for the receiver in three modes: no time-stamping (base rate) time-stamping using rw_lock (old lock) for critical region time-stamping using seq_lock (new lock) for critical region Only the receiver time stamped its packets. The measured packet rate improvements are: Single flow (multiple TX rings to single RX ring): without timestamping: 4.26 (M packets)/sec with rw-lock (old lock): 4.1 (M packets)/sec with seq-lock (new lock): 4.16 (M packets)/sec 1.46% improvement Multiple flows (multiple TX rings to six RX rings): without timestamping: 22 (M packets)/sec with rw-lock (old lock): 11.7 (M packets)/sec with seq-lock (new lock): 21.3 (M packets)/sec 82.05% improvement The packet rate improvement is due to the lack of atomic operations for the 'readers' by the seq-lock. Since there are much more 'readers' than 'writers' contention on this lock, almost all atomic operations are saved. this results in a dramatic decrease in overall cache misses. Signed-off-by: Shay Agroskin Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2a0c845f6bdb..b7fce2c9443d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -805,7 +805,7 @@ struct mlx5_pps { }; struct mlx5_clock { - rwlock_t lock; + seqlock_t lock; struct cyclecounter cycles; struct timecounter tc; struct hwtstamp_config hwtstamp_config; -- cgit v1.2.3 From fa788d986a3aac5069378ed04697bd06f83d3488 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 3 Sep 2018 16:23:36 +0200 Subject: packet: add sockopt to ignore outgoing packets Currently, the only way to ignore outgoing packets on a packet socket is via the BPF filter. With MSG_ZEROCOPY, packets that are looped into AF_PACKET are copied in dev_queue_xmit_nit(), and this copy happens even if the filter run from packet_rcv() would reject them. So the presence of a packet socket on the interface takes away the benefits of MSG_ZEROCOPY, even if the packet socket is not interested in outgoing packets. (Even when MSG_ZEROCOPY is not used, the skb is unnecessarily cloned, but the cost for that is much lower.) Add a socket option to allow AF_PACKET sockets to ignore outgoing packets to solve this. Note that the *BSDs already have something similar: BIOCSSEESENT/BIOCSDIRECTION and BIOCSDIRFILT. The first intended user is lldpd. Signed-off-by: Vincent Whitchurch Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/uapi/linux/if_packet.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4271f6b4e419..e2b3bd750c98 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2343,6 +2343,7 @@ static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, struct packet_type { __be16 type; /* This is really htons(ether_type). */ + bool ignore_outgoing; struct net_device *dev; /* NULL is wildcarded here */ int (*func) (struct sk_buff *, struct net_device *, diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 67b61d91d89b..467b654bd4c7 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -57,6 +57,7 @@ struct sockaddr_ll { #define PACKET_QDISC_BYPASS 20 #define PACKET_ROLLOVER_STATS 21 #define PACKET_FANOUT_DATA 22 +#define PACKET_IGNORE_OUTGOING 23 #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 -- cgit v1.2.3 From c383edc42403b0bca31cbaabafd44dd58afb202f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 4 Sep 2018 21:53:47 +0200 Subject: rtnetlink: add rtnl_get_net_ns_capable() get_target_net() will be used in follow-up patches in ipv{4,6} codepaths to retrieve network namespaces based on network namespace identifiers. So remove the static declaration and export in the rtnetlink header. Also, rename it to rtnl_get_net_ns_capable() to make it obvious what this function is doing. Export rtnl_get_net_ns_capable() so it can be used when ipv6 is built as a module. Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 0bbaa5488423..cf26e5aacac4 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -165,6 +165,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm); int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, struct netlink_ext_ack *exterr); +struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid); #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) -- cgit v1.2.3 From 9f3c057c146fce335c160e95ca893d5bc34e7d00 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 4 Sep 2018 21:53:48 +0200 Subject: if_addr: add IFA_TARGET_NETNSID This adds a new IFA_TARGET_NETNSID property to be used by address families such as PF_INET and PF_INET6. The IFA_TARGET_NETNSID property can be used to send a network namespace identifier as part of a request. If a IFA_TARGET_NETNSID property is identified it will be used to retrieve the target network namespace in which the request is to be made. Signed-off-by: Christian Brauner Cc: Jiri Benc Cc: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/if_addr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h index ebaf5701c9db..dfcf3ce0097f 100644 --- a/include/uapi/linux/if_addr.h +++ b/include/uapi/linux/if_addr.h @@ -34,6 +34,7 @@ enum { IFA_MULTICAST, IFA_FLAGS, IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ + IFA_TARGET_NETNSID, __IFA_MAX, }; -- cgit v1.2.3 From 19d8f1ad12fd746e60707a58d954980013c7a35a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 4 Sep 2018 21:53:52 +0200 Subject: if_link: add IFLA_TARGET_NETNSID alias This adds IFLA_TARGET_NETNSID as an alias for IFLA_IF_NETNSID for RTM_*LINK requests. The new name is clearer and also aligns with the newly introduced IFA_TARGET_NETNSID propert for RTM_*ADDR requests. Signed-off-by: Christian Brauner Suggested-by: Nicolas Dichtel Cc: Jiri Benc Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 43391e2d1153..29d49b989acd 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -161,6 +161,7 @@ enum { IFLA_EVENT, IFLA_NEW_NETNSID, IFLA_IF_NETNSID, + IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */ IFLA_CARRIER_UP_COUNT, IFLA_CARRIER_DOWN_COUNT, IFLA_NEW_IFINDEX, -- cgit v1.2.3 From a3f723079df85eafc10c628dabdfcf374b8e1523 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Wed, 5 Sep 2018 18:35:55 +0300 Subject: qed*: Utilize FW 8.37.7.0 This patch adds a new qed firmware with fixes and support for new features. Fixes: - Fix a rare case of device crash with iWARP, iSCSI or FCoE offload. - Fix GRE tunneled traffic when iWARP offload is enabled. - Fix RoCE failure in ib_send_bw when using inline data. - Fix latency optimization flow for inline WQEs. - BigBear 100G fix RDMA: - Reduce task context size. - Application page sizes above 2GB support. - Performance improvements. ETH: - Tenant DCB support. - Replace RSS indirection table update interface. Misc: - Debug Tools changes. Signed-off-by: Denis Bolotin Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 10 +++++----- include/linux/qed/iscsi_common.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 0081fa6d1268..03f59a28fefd 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -110,7 +110,7 @@ #define FW_MAJOR_VERSION 8 #define FW_MINOR_VERSION 37 -#define FW_REVISION_VERSION 2 +#define FW_REVISION_VERSION 7 #define FW_ENGINEERING_VERSION 0 /***********************/ @@ -931,12 +931,12 @@ struct db_rdma_dpm_params { #define DB_RDMA_DPM_PARAMS_WQE_SIZE_SHIFT 16 #define DB_RDMA_DPM_PARAMS_RESERVED0_MASK 0x1 #define DB_RDMA_DPM_PARAMS_RESERVED0_SHIFT 27 -#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_MASK 0x1 -#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_SHIFT 28 +#define DB_RDMA_DPM_PARAMS_ACK_REQUEST_MASK 0x1 +#define DB_RDMA_DPM_PARAMS_ACK_REQUEST_SHIFT 28 #define DB_RDMA_DPM_PARAMS_S_FLG_MASK 0x1 #define DB_RDMA_DPM_PARAMS_S_FLG_SHIFT 29 -#define DB_RDMA_DPM_PARAMS_RESERVED1_MASK 0x1 -#define DB_RDMA_DPM_PARAMS_RESERVED1_SHIFT 30 +#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_MASK 0x1 +#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_SHIFT 30 #define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_MASK 0x1 #define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_SHIFT 31 }; diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index b34c573f2b30..66aba505ec56 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -896,7 +896,7 @@ struct e4_ustorm_iscsi_task_ag_ctx { __le32 exp_cont_len; __le32 total_data_acked; __le32 exp_data_acked; - u8 next_tid_valid; + u8 byte2; u8 byte3; __le16 word1; __le16 next_tid; -- cgit v1.2.3 From 0153167aebd0808fb90031dba07d4e696557474c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 10 Sep 2018 09:11:28 -0700 Subject: net/ipv6: Remove rt6i_prefsrc After the conversion to fib6_info, rt6i_prefsrc has a single user that reads the value and otherwise it is only set. The one reader can be converted to use rt->from so rt6i_prefsrc can be removed, reducing rt6_info by another 20 bytes. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 3d4930528db0..c7496663f99a 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -182,7 +182,6 @@ struct rt6_info { struct in6_addr rt6i_gateway; struct inet6_dev *rt6i_idev; u32 rt6i_flags; - struct rt6key rt6i_prefsrc; struct list_head rt6i_uncached; struct uncached_list *rt6i_uncached_list; -- cgit v1.2.3 From aea890b8b2e071bb75043353581f2197a2f13160 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 29 Jul 2018 16:22:13 -0700 Subject: sch_htb: Remove local SKB queue handling code. Instead, adjust __qdisc_enqueue_tail() such that HTB can use it instead. The only other caller of __qdisc_enqueue_tail() is qdisc_enqueue_tail() so we can move the backlog and return value handling (which HTB doesn't need/want) to the latter. Signed-off-by: David S. Miller --- include/net/sch_generic.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a6d00093f35e..bc8f6b0b6610 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -828,8 +828,8 @@ static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) qh->qlen = 0; } -static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, - struct qdisc_skb_head *qh) +static inline void __qdisc_enqueue_tail(struct sk_buff *skb, + struct qdisc_skb_head *qh) { struct sk_buff *last = qh->tail; @@ -842,14 +842,13 @@ static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, qh->head = skb; } qh->qlen++; - qdisc_qstats_backlog_inc(sch, skb); - - return NET_XMIT_SUCCESS; } static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) { - return __qdisc_enqueue_tail(skb, sch, &sch->q); + __qdisc_enqueue_tail(skb, &sch->q); + qdisc_qstats_backlog_inc(sch, skb); + return NET_XMIT_SUCCESS; } static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) -- cgit v1.2.3 From 596977300ab5c5d5d85f7950dd7f299f8322e533 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 29 Jul 2018 16:33:28 -0700 Subject: sch_netem: Move private queue handler to generic location. By hand copies of SKB list handlers do not belong in individual packet schedulers. Signed-off-by: David S. Miller --- include/net/sch_generic.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index bc8f6b0b6610..fdaa5506e6f7 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -851,6 +851,17 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } +static inline void __qdisc_enqueue_head(struct sk_buff *skb, + struct qdisc_skb_head *qh) +{ + skb->next = qh->head; + + if (!qh->head) + qh->tail = skb; + qh->head = skb; + qh->qlen++; +} + static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) { struct sk_buff *skb = qh->head; -- cgit v1.2.3 From 8b69bd7d8a8927d537f134c37bcca6cbfa58e1b2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Aug 2018 18:43:38 -0700 Subject: ppp: Remove direct skb_queue_head list pointer access. Add a helper, __skb_peek(), and use it in ppp_mp_reconstruct(). Signed-off-by: David S. Miller --- include/linux/skbuff.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 17a13e4785fc..89283b77294d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1592,6 +1592,17 @@ static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_) return skb; } +/** + * __skb_peek - peek at the head of a non-empty &sk_buff_head + * @list_: list to peek at + * + * Like skb_peek(), but the caller knows that the list is not empty. + */ +static inline struct sk_buff *__skb_peek(const struct sk_buff_head *list_) +{ + return list_->next; +} + /** * skb_peek_next - peek skb following the given one from a queue * @skb: skb to start from -- cgit v1.2.3 From a8305bff685252e80b7c60f4f5e7dd2e63e38218 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 29 Jul 2018 20:42:53 -0700 Subject: net: Add and use skb_mark_not_on_list(). An SKB is not on a list if skb->next is NULL. Codify this convention into a helper function and use it where we are dequeueing an SKB and need to mark it as such. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 89283b77294d..c4c9e3f5cd9a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1339,6 +1339,11 @@ static inline void skb_zcopy_abort(struct sk_buff *skb) } } +static inline void skb_mark_not_on_list(struct sk_buff *skb) +{ + skb->next = NULL; +} + /** * skb_queue_empty - check if a queue is empty * @list: queue head -- cgit v1.2.3 From 992cba7e276d438ac8b0a8c17b147b37c8c286f7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 Jul 2018 15:27:56 -0700 Subject: net: Add and use skb_list_del_init(). It documents what is happening, and eliminates the spurious list pointer poisoning. In the long term, in order to get proper list head debugging, we might want to use the list poison value as the indicator that an SKB is a singleton and not on a list. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c4c9e3f5cd9a..e3a53ca4a9b5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1344,6 +1344,12 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb) skb->next = NULL; } +static inline void skb_list_del_init(struct sk_buff *skb) +{ + __list_del_entry(&skb->list); + skb_mark_not_on_list(skb); +} + /** * skb_queue_empty - check if a queue is empty * @list: queue head -- cgit v1.2.3 From 86c55361e569400b6286f30283a9c143a18c20d9 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 7 Sep 2018 17:22:21 +0300 Subject: net: sched: cls_flower: dump offload count value Change flower in_hw_count type to fixed-size u32 and dump it as TCA_FLOWER_IN_HW_COUNT. This change is necessary to properly test shared blocks and re-offload functionality. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 +- include/uapi/linux/pkt_cls.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index fdaa5506e6f7..d326fd553b58 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -362,7 +362,7 @@ static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) } static inline void -tc_cls_offload_cnt_update(struct tcf_block *block, unsigned int *cnt, +tc_cls_offload_cnt_update(struct tcf_block *block, u32 *cnt, u32 *flags, bool add) { if (add) { diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index be382fb0592d..401d0c1e612d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -483,6 +483,8 @@ enum { TCA_FLOWER_KEY_ENC_OPTS, TCA_FLOWER_KEY_ENC_OPTS_MASK, + TCA_FLOWER_IN_HW_COUNT, + __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 67edf21e5adfd336f2ff08668eb09850943666d3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 10 Sep 2018 17:21:42 -0700 Subject: scsi: libcxgbi: fib6_ino reference in rt6_info is rcu protected The fib6_info reference in rt6_info is rcu protected. Add a helper to extract prefsrc from and update cxgbi_check_route6 to use it. Fixes: 0153167aebd0 ("net/ipv6: Remove rt6i_prefsrc") Reported-by: kbuild test robot Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index c7496663f99a..f06e968f1992 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -412,6 +412,25 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack); int fib6_del(struct fib6_info *rt, struct nl_info *info); +static inline +void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) +{ + const struct fib6_info *from; + + rcu_read_lock(); + + from = rcu_dereference(rt->from); + if (from) { + *addr = from->fib6_prefsrc.addr; + } else { + struct in6_addr in6_zero = {}; + + *addr = in6_zero; + } + + rcu_read_unlock(); +} + static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) { return f6i->fib6_nh.nh_dev; -- cgit v1.2.3 From 41124fa64d4b298b82266b7ddbefc43540b77b44 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 12 Sep 2018 01:53:14 +0200 Subject: net: ethernet: Add helper to remove a supported link mode Some MAC hardware cannot support a subset of link modes. e.g. often 1Gbps Full duplex is supported, but Half duplex is not. Add a helper to remove such a link mode. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index cd6f637cbbfb..9c4c3eca8cf2 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1049,6 +1049,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); +void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); -- cgit v1.2.3 From af8d9bb2f2f405ad541794b46f9d7bc70f13e5cb Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 12 Sep 2018 01:53:15 +0200 Subject: net: ethernet: Add helper for MACs which support asym pause Rather than have the MAC drivers manipulate phydev members to indicate they support Asym Pause, add a helper function. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 9c4c3eca8cf2..e2db819807c1 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1050,6 +1050,7 @@ int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); +void phy_support_asym_pause(struct phy_device *phydev); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); -- cgit v1.2.3 From c306ad36184fb7d0bd53f45441f45c1810e88a53 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 12 Sep 2018 01:53:16 +0200 Subject: net: ethernet: Add helper for MACs which support pause Rather than have the MAC drivers manipulate phydev members, add a helper function for MACs supporting Pause, but not Asym Pause. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index e2db819807c1..bc5d6c3f1388 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1050,6 +1050,7 @@ int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); +void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, -- cgit v1.2.3 From 70814e819c1139e5e7faacb3700eab5eac559272 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 12 Sep 2018 01:53:17 +0200 Subject: net: ethernet: Add helper for set_pauseparam for Asym Pause ethtool can be used to enable/disable pause. Add a helper to configure the PHY when asym pause is supported. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index bc5d6c3f1388..e4062ba7472f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1052,6 +1052,7 @@ int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); +void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); -- cgit v1.2.3 From 0c122405d4c3ec638ba00865c872ec5a3ed1a6c0 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 12 Sep 2018 01:53:18 +0200 Subject: net: ethernet: Add helper for set_pauseparam for Pause ethtool can be used to enable/disable pause. Add a helper to configure the PHY when Pause is supported. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index e4062ba7472f..8521391ebb20 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1052,6 +1052,8 @@ int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); +void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, + bool autoneg); void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, -- cgit v1.2.3 From 22b7d29926b577ff4f480611380d03268545b787 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 12 Sep 2018 01:53:19 +0200 Subject: net: ethernet: Add helper to determine if pause configuration is supported Rather than have MAC drivers open code the test, add a helper in phylib. This will help when we change the type of phydev->supported. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 8521391ebb20..192a1fa0c73b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1055,6 +1055,8 @@ void phy_support_asym_pause(struct phy_device *phydev); void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, bool autoneg); void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); +bool phy_validate_pause(struct phy_device *phydev, + struct ethtool_pauseparam *pp); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); -- cgit v1.2.3 From 435f2e7cc0b783615d7fbcf08f5f00d289f9caeb Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 11 Sep 2018 09:39:53 +0300 Subject: net: bridge: add support for sticky fdb entries Add support for entries which are "sticky", i.e. will not change their port if they show up from a different one. A new ndm flag is introduced for that purpose - NTF_STICKY. We allow to set it only to non-local entries. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 904db6148476..998155444e0d 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -43,6 +43,7 @@ enum { #define NTF_PROXY 0x08 /* == ATF_PUBL */ #define NTF_EXT_LEARNED 0x10 #define NTF_OFFLOADED 0x20 +#define NTF_STICKY 0x40 #define NTF_ROUTER 0x80 /* -- cgit v1.2.3 From 52d0d404d39dd9eac71a181615d6ca15e23d8e38 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 12 Sep 2018 10:04:21 +0800 Subject: geneve: add ttl inherit support Similar with commit 72f6d71e491e6 ("vxlan: add ttl inherit support"), currently ttl == 0 means "use whatever default value" on geneve instead of inherit inner ttl. To respect compatibility with old behavior, let's add a new IFLA_GENEVE_TTL_INHERIT for geneve ttl inherit support. Reported-by: Jianlin Shi Suggested-by: Jiri Benc Signed-off-by: Hangbin Liu Reviewed-by: Jiri Benc Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 29d49b989acd..58faab897201 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -555,6 +555,7 @@ enum { IFLA_GENEVE_UDP_ZERO_CSUM6_TX, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, IFLA_GENEVE_LABEL, + IFLA_GENEVE_TTL_INHERIT, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) -- cgit v1.2.3 From 7969119293f5aa3b51040ae81a80e87c7b979b2d Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 9 Sep 2018 22:16:43 +0200 Subject: net: dsa: Add Lantiq / Intel GSWIP tag support This handles the tag added by the PMAC on the VRX200 SoC line. The GSWIP uses internally a GSWIP special tag which is located after the Ethernet header. The PMAC which connects the GSWIP to the CPU converts this special tag used by the GSWIP into the PMAC special tag which is added in front of the Ethernet header. This was tested with GSWIP 2.1 found in the VRX200 SoCs, other GSWIP versions use slightly different PMAC special tags. Signed-off-by: Hauke Mehrtens Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 461e8a7661b7..23690c44e167 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -35,6 +35,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_BRCM_PREPEND, DSA_TAG_PROTO_DSA, DSA_TAG_PROTO_EDSA, + DSA_TAG_PROTO_GSWIP, DSA_TAG_PROTO_KSZ, DSA_TAG_PROTO_LAN9303, DSA_TAG_PROTO_MTK, -- cgit v1.2.3 From 15033f0457dca569b284bef0c8d3ad55fb37eacb Mon Sep 17 00:00:00 2001 From: Andre Naujoks Date: Mon, 10 Sep 2018 10:27:15 +0200 Subject: ipv6: Add sockopt IPV6_MULTICAST_ALL analogue to IP_MULTICAST_ALL The socket option will be enabled by default to ensure current behaviour is not changed. This is the same for the IPv4 version. A socket bound to in6addr_any and a specific port will receive all traffic on that port. Analogue to IP_MULTICAST_ALL, disable this behaviour, if one or more multicast groups were joined (using said socket) and only pass on multicast traffic from groups, which were explicitly joined via this socket. Without this option disabled a socket (system even) joined to multiple multicast groups is very hard to get right. Filtering by destination address has to take place in user space to avoid receiving multicast traffic from other multicast groups, which might have traffic on the same port. The extension of the IP_MULTICAST_ALL socketoption to just apply to ipv6, too, is not done to avoid changing the behaviour of current applications. Signed-off-by: Andre Naujoks Acked-By: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 ++- include/uapi/linux/in6.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 8415bf1a9776..495e834c1367 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -274,7 +274,8 @@ struct ipv6_pinfo { */ dontfrag:1, autoflowlabel:1, - autoflowlabel_set:1; + autoflowlabel_set:1, + mc_all:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index ed291e55f024..71d82fe15b03 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -177,6 +177,7 @@ struct in6_flowlabel_req { #define IPV6_V6ONLY 26 #define IPV6_JOIN_ANYCAST 27 #define IPV6_LEAVE_ANYCAST 28 +#define IPV6_MULTICAST_ALL 29 /* IPV6_MTU_DISCOVER values */ #define IPV6_PMTUDISC_DONT 0 -- cgit v1.2.3 From 9708d2b5b7c648e8e0a40d11e8cea12f6277f33c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 11 Sep 2018 11:42:06 -0700 Subject: llc: avoid blocking in llc_sap_close() llc_sap_close() is called by llc_sap_put() which could be called in BH context in llc_rcv(). We can't block in BH. There is no reason to block it here, kfree_rcu() should be sufficient. Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/llc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/llc.h b/include/net/llc.h index 890a87318014..df282d9b4017 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -66,6 +66,7 @@ struct llc_sap { int sk_count; struct hlist_nulls_head sk_laddr_hash[LLC_SK_LADDR_HASH_ENTRIES]; struct hlist_head sk_dev_hash[LLC_SK_DEV_HASH_ENTRIES]; + struct rcu_head rcu; }; static inline -- cgit v1.2.3 From e4a2a3048ed93f0c354ad837f1d45fc8d389d538 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 12 Sep 2018 11:16:59 +0800 Subject: net: sock: introduce SOCK_XDP This patch introduces a new sock flag - SOCK_XDP. This will be used for notifying the upper layer that XDP program is attached on the lower socket, and requires for extra headroom. TUN will be the first user. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 433f45fc2d68..38cae35f6e16 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -800,6 +800,7 @@ enum sock_flags { SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */ SOCK_TXTIME, + SOCK_XDP, /* XDP is attached */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) -- cgit v1.2.3 From fe8dd45bb7556246c6b76277b1ba4296c91c2505 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 12 Sep 2018 11:17:06 +0800 Subject: tun: switch to new type of msg_control This patch introduces to a new tun/tap specific msg_control: #define TUN_MSG_UBUF 1 #define TUN_MSG_PTR 2 struct tun_msg_ctl { int type; void *ptr; }; This allows us to pass different kinds of msg_control through sendmsg(). The first supported type is ubuf (TUN_MSG_UBUF) which will be used by the existed vhost_net zerocopy code. The second is XDP buff, which allows vhost_net to pass XDP buff to TUN. This could be used to implement accepting an array of XDP buffs from vhost_net in the following patches. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/if_tun.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 3d2996dc7d85..12e3eebf0ce6 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -16,9 +16,23 @@ #define __IF_TUN_H #include +#include #define TUN_XDP_FLAG 0x1UL +#define TUN_MSG_UBUF 1 +#define TUN_MSG_PTR 2 +struct tun_msg_ctl { + unsigned short type; + unsigned short num; + void *ptr; +}; + +struct tun_xdp_hdr { + int buflen; + struct virtio_net_hdr gso; +}; + #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file); -- cgit v1.2.3 From 293681f149a8dc4c9df2c09b2c4e873d474be5d4 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 13 Sep 2018 21:32:23 +0800 Subject: vxlan: Remove duplicated include from vxlan.h Remove duplicated include. Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- include/net/vxlan.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index b99a02ae3934..7ef15179f263 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -5,7 +5,6 @@ #include #include #include -#include /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -- cgit v1.2.3 From 52bb6677d530d37055092d86b4eab69dce6c166a Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 14 Sep 2018 16:00:51 +0800 Subject: net: move definition of pcpu_lstats to header file pcpu_lstats is defined in several files, so unify them as one and move to header file Signed-off-by: Zhang Yu Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e2b3bd750c98..baed5d5088c5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2382,6 +2382,12 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; }; +struct pcpu_lstats { + u64 packets; + u64 bytes; + struct u64_stats_sync syncp; +}; + #define __netdev_alloc_pcpu_stats(type, gfp) \ ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ -- cgit v1.2.3 From d58e468b1112dcd1d5193c0a89ff9f98b5a3e8b9 Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Fri, 14 Sep 2018 07:46:18 -0700 Subject: flow_dissector: implements flow dissector BPF hook Adds a hook for programs of type BPF_PROG_TYPE_FLOW_DISSECTOR and attach type BPF_FLOW_DISSECTOR that is executed in the flow dissector path. The BPF program is per-network namespace. Signed-off-by: Petar Penkov Signed-off-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/linux/bpf_types.h | 1 + include/linux/skbuff.h | 7 +++++++ include/net/net_namespace.h | 3 +++ include/net/sch_generic.h | 12 +++++++++--- include/uapi/linux/bpf.h | 26 ++++++++++++++++++++++++++ 6 files changed, 47 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 523481a3471b..988a00797bcd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -212,6 +212,7 @@ enum bpf_reg_type { PTR_TO_PACKET_META, /* skb->data - meta_len */ PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ + PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ }; /* The information passed from prog-specific *_is_valid_access diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index cd26c090e7c0..22083712dd18 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -32,6 +32,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) #ifdef CONFIG_INET BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) #endif +BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector) BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 17a13e4785fc..ce0e863f02a2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -243,6 +243,8 @@ struct scatterlist; struct pipe_inode_info; struct iov_iter; struct napi_struct; +struct bpf_prog; +union bpf_attr; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack { @@ -1192,6 +1194,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, unsigned int key_count); +int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, + struct bpf_prog *prog); + +int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr); + bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 9b5fdc50519a..99d4148e0f90 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -43,6 +43,7 @@ struct ctl_table_header; struct net_generic; struct uevent_sock; struct netns_ipvs; +struct bpf_prog; #define NETDEV_HASHBITS 8 @@ -145,6 +146,8 @@ struct net { #endif struct net_generic __rcu *gen; + struct bpf_prog __rcu *flow_dissector_prog; + /* Note : following structs are cache line aligned */ #ifdef CONFIG_XFRM struct netns_xfrm xfrm; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a6d00093f35e..1b81ba85fd2d 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -19,6 +19,7 @@ struct Qdisc_ops; struct qdisc_walker; struct tcf_walker; struct module; +struct bpf_flow_keys; typedef int tc_setup_cb_t(enum tc_setup_type type, void *type_data, void *cb_priv); @@ -307,9 +308,14 @@ struct tcf_proto { }; struct qdisc_skb_cb { - unsigned int pkt_len; - u16 slave_dev_queue_mapping; - u16 tc_classid; + union { + struct { + unsigned int pkt_len; + u16 slave_dev_queue_mapping; + u16 tc_classid; + }; + struct bpf_flow_keys *flow_keys; + }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 66917a4eba27..aa5ccd2385ed 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -152,6 +152,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_SEG6LOCAL, BPF_PROG_TYPE_LIRC_MODE2, BPF_PROG_TYPE_SK_REUSEPORT, + BPF_PROG_TYPE_FLOW_DISSECTOR, }; enum bpf_attach_type { @@ -172,6 +173,7 @@ enum bpf_attach_type { BPF_CGROUP_UDP4_SENDMSG, BPF_CGROUP_UDP6_SENDMSG, BPF_LIRC_MODE2, + BPF_FLOW_DISSECTOR, __MAX_BPF_ATTACH_TYPE }; @@ -2333,6 +2335,7 @@ struct __sk_buff { /* ... here. */ __u32 data_meta; + struct bpf_flow_keys *flow_keys; }; struct bpf_tunnel_key { @@ -2778,4 +2781,27 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; +struct bpf_flow_keys { + __u16 nhoff; + __u16 thoff; + __u16 addr_proto; /* ETH_P_* of valid addrs */ + __u8 is_frag; + __u8 is_first_frag; + __u8 is_encap; + __u8 ip_proto; + __be16 n_proto; + __be16 sport; + __be16 dport; + union { + struct { + __be32 ipv4_src; + __be32 ipv4_dst; + }; + struct { + __u32 ipv6_src[4]; /* in6_addr; network order */ + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + }; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 02b408fae3d5552d10d1189fc0bd7e5b1e76af71 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 29 Aug 2018 00:19:00 +0200 Subject: netfilter: nf_tables: rt: allow checking if dst has xfrm attached Useful e.g. to avoid NATting inner headers of to-be-encrypted packets. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index e23290ffdc77..6c44cbbb2cda 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -826,12 +826,14 @@ enum nft_meta_keys { * @NFT_RT_NEXTHOP4: routing nexthop for IPv4 * @NFT_RT_NEXTHOP6: routing nexthop for IPv6 * @NFT_RT_TCPMSS: fetch current path tcp mss + * @NFT_RT_XFRM: boolean, skb->dst->xfrm != NULL */ enum nft_rt_keys { NFT_RT_CLASSID, NFT_RT_NEXTHOP4, NFT_RT_NEXTHOP6, NFT_RT_TCPMSS, + NFT_RT_XFRM, __NFT_RT_MAX }; #define NFT_RT_MAX (__NFT_RT_MAX - 1) -- cgit v1.2.3 From cd5125d8f51882279f50506bb9c7e5e89dc9bef3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 29 Aug 2018 14:41:30 +0200 Subject: netfilter: nf_tables: split set destruction in deactivate and destroy phase Splits unbind_set into destroy_set and unbinding operation. Unbinding removes set from lists (so new transaction would not find it anymore) but keeps memory allocated (so packet path continues to work). Rebind function is added to allow unrolling in case transaction that wants to remove set is aborted. Destroy function is added to free the memory, but this could occur outside of transaction in the future. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0f39ac487012..2c33958f3e7a 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -470,6 +470,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); +void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding); +void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set); /** * enum nft_set_extensions - set extension type IDs @@ -724,7 +727,9 @@ struct nft_expr_type { * @eval: Expression evaluation function * @size: full expression size, including private data size * @init: initialization function - * @destroy: destruction function + * @activate: activate expression in the next generation + * @deactivate: deactivate expression in next generation + * @destroy: destruction function, called after synchronize_rcu * @dump: function to dump parameters * @type: expression type * @validate: validate expression, called during loop detection -- cgit v1.2.3 From 0935d558840099b3679c67bb7468dc78fcbad940 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 29 Aug 2018 14:41:32 +0200 Subject: netfilter: nf_tables: asynchronous release Release the committed transaction log from a work queue, moving expensive synchronize_rcu out of the locked section and providing opportunity to batch this. On my test machine this cuts runtime of nft-test.py in half. Based on earlier patch from Pablo Neira Ayuso. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2c33958f3e7a..841835a387e1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1298,12 +1298,14 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) * * @list: used internally * @msg_type: message type + * @put_net: ctx->net needs to be put * @ctx: transaction context * @data: internal information related to the transaction */ struct nft_trans { struct list_head list; int msg_type; + bool put_net; struct nft_ctx ctx; char data[0]; }; -- cgit v1.2.3 From 2953d80ff04862b26a2e628fb3948868f54d753d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 31 Aug 2018 20:29:37 +0200 Subject: netfilter: remove obsolete need_conntrack stub as of a0ae2562c6c4b27 ("netfilter: conntrack: remove l3proto abstraction") there are no users anymore. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 03097fa70975..e142b2b5f1ea 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -19,7 +19,4 @@ struct ip_conntrack_stat { unsigned int search_restart; }; -/* call to create an explicit dependency on nf_conntrack. */ -void need_conntrack(void); - #endif /* _NF_CONNTRACK_COMMON_H */ -- cgit v1.2.3 From 6c47260250fc6114ce2012db13e1cd3938a27b73 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 3 Sep 2018 18:09:40 +0200 Subject: netfilter: nf_tables: add xfrm expression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit supports fetching saddr/daddr of tunnel mode states, request id and spi. If direction is 'in', use inbound skb secpath, else dst->xfrm. Joint work with Máté Eckl. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 6c44cbbb2cda..702e4f0bec56 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1514,6 +1514,35 @@ enum nft_devices_attributes { }; #define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) +/* + * enum nft_xfrm_attributes - nf_tables xfrm expr netlink attributes + * + * @NFTA_XFRM_DREG: destination register (NLA_U32) + * @NFTA_XFRM_KEY: enum nft_xfrm_keys (NLA_U32) + * @NFTA_XFRM_DIR: direction (NLA_U8) + * @NFTA_XFRM_SPNUM: index in secpath array (NLA_U32) + */ +enum nft_xfrm_attributes { + NFTA_XFRM_UNSPEC, + NFTA_XFRM_DREG, + NFTA_XFRM_KEY, + NFTA_XFRM_DIR, + NFTA_XFRM_SPNUM, + __NFTA_XFRM_MAX +}; +#define NFTA_XFRM_MAX (__NFTA_XFRM_MAX - 1) + +enum nft_xfrm_keys { + NFT_XFRM_KEY_UNSPEC, + NFT_XFRM_KEY_DADDR_IP4, + NFT_XFRM_KEY_DADDR_IP6, + NFT_XFRM_KEY_SADDR_IP4, + NFT_XFRM_KEY_SADDR_IP6, + NFT_XFRM_KEY_REQID, + NFT_XFRM_KEY_SPI, + __NFT_XFRM_KEY_MAX, +}; +#define NFT_XFRM_KEY_MAX (__NFT_XFRM_KEY_MAX - 1) /** * enum nft_trace_attributes - nf_tables trace netlink attributes -- cgit v1.2.3 From 0d704967f4a49cc2212350b3e4a8231f8b4283ed Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 4 Sep 2018 12:07:55 +0200 Subject: netfilter: xt_cgroup: shrink size of v2 path cgroup v2 path field is PATH_MAX which is too large, this is placing too much pressure on memory allocation for people with many rules doing cgroup v1 classid matching, side effects of this are bug reports like: https://bugzilla.kernel.org/show_bug.cgi?id=200639 This patch registers a new revision that shrinks the cgroup path to 512 bytes, which is the same approach we follow in similar extensions that have a path field. Cc: Tejun Heo Signed-off-by: Pablo Neira Ayuso Acked-by: Tejun Heo --- include/uapi/linux/netfilter/xt_cgroup.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/xt_cgroup.h b/include/uapi/linux/netfilter/xt_cgroup.h index e96dfa1b34f7..b74e370d6133 100644 --- a/include/uapi/linux/netfilter/xt_cgroup.h +++ b/include/uapi/linux/netfilter/xt_cgroup.h @@ -22,4 +22,20 @@ struct xt_cgroup_info_v1 { void *priv __attribute__((aligned(8))); }; +#define XT_CGROUP_PATH_MAX 512 + +struct xt_cgroup_info_v2 { + __u8 has_path; + __u8 has_classid; + __u8 invert_path; + __u8 invert_classid; + union { + char path[XT_CGROUP_PATH_MAX]; + __u32 classid; + }; + + /* kernel internal data */ + void *priv __attribute__((aligned(8))); +}; + #endif /* _UAPI_XT_CGROUP_H */ -- cgit v1.2.3 From 7a3dd8c8979ce48b99cb0e9b7435a97f0716138a Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 14 Sep 2018 13:01:46 -0700 Subject: tls: async support causes out-of-bounds access in crypto APIs When async support was added it needed to access the sk from the async callback to report errors up the stack. The patch tried to use space after the aead request struct by directly setting the reqsize field in aead_request. This is an internal field that should not be used outside the crypto APIs. It is used by the crypto code to define extra space for private structures used in the crypto context. Users of the API then use crypto_aead_reqsize() and add the returned amount of bytes to the end of the request memory allocation before posting the request to encrypt/decrypt APIs. So this breaks (with general protection fault and KASAN error, if enabled) because the request sent to decrypt is shorter than required causing the crypto API out-of-bounds errors. Also it seems unlikely the sk is even valid by the time it gets to the callback because of memset in crypto layer. Anyways, fix this by holding the sk in the skb->sk field when the callback is set up and because the skb is already passed through to the callback handler via void* we can access it in the handler. Then in the handler we need to be careful to NULL the pointer again before kfree_skb. I added comments on both the setup (in tls_do_decryption) and when we clear it from the crypto callback handler tls_decrypt_done(). After this selftests pass again and fixes KASAN errors/warnings. Fixes: 94524d8fc965 ("net/tls: Add support for async decryption of tls records") Signed-off-by: John Fastabend Reviewed-by: Vakul Garg Signed-off-by: David S. Miller --- include/net/tls.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index cd0a65bd92f9..8630d28bd951 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -128,10 +128,6 @@ struct tls_sw_context_rx { bool async_notify; }; -struct decrypt_req_ctx { - struct sock *sk; -}; - struct tls_record_info { struct list_head list; u32 end_seq; -- cgit v1.2.3 From 568b742a9d9888aca876b6ad9fa45490f18bee0a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 17 Sep 2018 11:57:28 +0200 Subject: netlink: add NLA_REJECT policy type In some situations some netlink attributes may be used for output only (kernel->userspace) or may be reserved for future use. It's then helpful to be able to prevent userspace from using them in messages sent to the kernel, since they'd otherwise be ignored and any future will become impossible if this happens. Add NLA_REJECT to the policy which does nothing but reject (with EINVAL) validation of any messages containing this attribute. Allow for returning a specific extended ACK error message in the validation_data pointer. While at it clear up the documentation a bit - the NLA_BITFIELD32 documentation was added to the list of len field descriptions. Also, use NL_SET_BAD_ATTR() in one place where it's open-coded. The specific case I have in mind now is a shared nested attribute containing request/response data, and it would be pointless and potentially confusing to have userspace include response data in the messages that actually contain a request. Signed-off-by: Johannes Berg Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/netlink.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 0c154f98e987..b318b0a9f6c3 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -180,6 +180,7 @@ enum { NLA_S32, NLA_S64, NLA_BITFIELD32, + NLA_REJECT, __NLA_TYPE_MAX, }; @@ -208,9 +209,19 @@ enum { * NLA_MSECS Leaving the length field zero will verify the * given type fits, using it verifies minimum length * just like "All other" - * NLA_BITFIELD32 A 32-bit bitmap/bitselector attribute + * NLA_BITFIELD32 Unused + * NLA_REJECT Unused * All other Minimum length of attribute payload * + * Meaning of `validation_data' field: + * NLA_BITFIELD32 This is a 32-bit bitmap/bitselector attribute and + * validation data must point to a u32 value of valid + * flags + * NLA_REJECT This attribute is always rejected and validation data + * may point to a string to report as the error instead + * of the generic one in extended ACK. + * All other Unused + * * Example: * static const struct nla_policy my_policy[ATTR_MAX+1] = { * [ATTR_FOO] = { .type = NLA_U16 }, -- cgit v1.2.3 From b60b87fc2996240e298529a46e122ef62ef9c27f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 17 Sep 2018 11:57:29 +0200 Subject: netlink: add ethernet address policy types Commonly, ethernet addresses are just using a policy of { .len = ETH_ALEN } which leaves userspace free to send more data than it should, which may hide bugs. Introduce NLA_EXACT_LEN which checks for exact size, rejecting the attribute if it's not exactly that length. Also add NLA_EXACT_LEN_WARN which requires the minimum length and will warn on longer attributes, for backward compatibility. Use these to define NLA_POLICY_ETH_ADDR (new strict policy) and NLA_POLICY_ETH_ADDR_COMPAT (compatible policy with warning); these are used like this: static const struct nla_policy [...] = { [NL_ATTR_NAME] = NLA_POLICY_ETH_ADDR, ... }; Signed-off-by: Johannes Berg Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/netlink.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index b318b0a9f6c3..318b1ded3833 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -181,6 +181,8 @@ enum { NLA_S64, NLA_BITFIELD32, NLA_REJECT, + NLA_EXACT_LEN, + NLA_EXACT_LEN_WARN, __NLA_TYPE_MAX, }; @@ -211,6 +213,10 @@ enum { * just like "All other" * NLA_BITFIELD32 Unused * NLA_REJECT Unused + * NLA_EXACT_LEN Attribute must have exactly this length, otherwise + * it is rejected. + * NLA_EXACT_LEN_WARN Attribute should have exactly this length, a warning + * is logged if it is longer, shorter is rejected. * All other Minimum length of attribute payload * * Meaning of `validation_data' field: @@ -236,6 +242,13 @@ struct nla_policy { void *validation_data; }; +#define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_EXACT_LEN, .len = _len } +#define NLA_POLICY_EXACT_LEN_WARN(_len) { .type = NLA_EXACT_LEN_WARN, \ + .len = _len } + +#define NLA_POLICY_ETH_ADDR NLA_POLICY_EXACT_LEN(ETH_ALEN) +#define NLA_POLICY_ETH_ADDR_COMPAT NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN) + /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request -- cgit v1.2.3 From 14d73416792afa84f6a7245ee474d2432069da56 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 17 Sep 2018 18:46:55 +0800 Subject: veth: rename pcpu_vstats as pcpu_lstats struct pcpu_vstats and pcpu_lstats have same members and usage, and pcpu_lstats is used in many files, so rename pcpu_vstats as pcpu_lstats to reduce duplicate definition Signed-off-by: Zhang Yu Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index baed5d5088c5..1cbbf77a685f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2000,7 +2000,6 @@ struct net_device { struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; struct pcpu_dstats __percpu *dstats; - struct pcpu_vstats __percpu *vstats; }; #if IS_ENABLED(CONFIG_GARP) -- cgit v1.2.3 From 2dfd184abd38fd72d80715fa8b00c9de78490200 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 18 Sep 2018 16:20:18 -0400 Subject: flow_dissector: fix build failure without CONFIG_NET If boolean CONFIG_BPF_SYSCALL is enabled, kernel/bpf/syscall.c will call flow_dissector functions from net/core/flow_dissector.c. This causes this build failure if CONFIG_NET is disabled: kernel/bpf/syscall.o: In function `__x64_sys_bpf': syscall.c:(.text+0x3278): undefined reference to `skb_flow_dissector_bpf_prog_attach' syscall.c:(.text+0x3310): undefined reference to `skb_flow_dissector_bpf_prog_detach' kernel/bpf/syscall.o:(.rodata+0x3f0): undefined reference to `flow_dissector_prog_ops' kernel/bpf/verifier.o:(.rodata+0x250): undefined reference to `flow_dissector_verifier_ops' Analogous to other optional BPF program types in syscall.c, add stubs if the relevant functions are not compiled and move the BPF_PROG_TYPE definition in the #ifdef CONFIG_NET block. Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") Reported-by: Randy Dunlap Signed-off-by: Willem de Bruijn Acked-by: Randy Dunlap # build-tested Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- include/linux/bpf_types.h | 2 +- include/linux/skbuff.h | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 22083712dd18..c9bd6fb765b0 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -16,6 +16,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local) BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg) +BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector) #endif #ifdef CONFIG_BPF_EVENTS BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) @@ -32,7 +33,6 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) #ifdef CONFIG_INET BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) #endif -BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector) BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ce0e863f02a2..76be85ea392a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1194,10 +1194,23 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, unsigned int key_count); +#ifdef CONFIG_NET int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr); +#else +static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} + +static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} +#endif bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, -- cgit v1.2.3 From 93e66024b0249cec81e91328c55a754efd3192e0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 12 Sep 2018 15:19:07 +0200 Subject: netfilter: conntrack: pass nf_hook_state to packet and error handlers nf_hook_state contains all the hook meta-information: netns, protocol family, hook location, and so on. Instead of only passing selected information, pass a pointer to entire structure. This will allow to merge the error and the packet handlers and remove the ->new() function in followup patches. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 3 +-- include/net/netfilter/nf_conntrack_l4proto.h | 7 ++++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 2a3e0974a6af..afc9b3620473 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -20,8 +20,7 @@ /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ -unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, - struct sk_buff *skb); +unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state); int nf_conntrack_init_net(struct net *net); void nf_conntrack_cleanup_net(struct net *net); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 8465263b297d..a857a0adfb31 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -45,7 +45,8 @@ struct nf_conntrack_l4proto { int (*packet)(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info ctinfo); + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); /* Called when a new connection for this protocol found; * returns TRUE if it's OK. If so, packet() called next. */ @@ -55,9 +56,9 @@ struct nf_conntrack_l4proto { /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); - int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, + int (*error)(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, - u_int8_t pf, unsigned int hooknum); + const struct nf_hook_state *state); /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); -- cgit v1.2.3 From 9976fc6e6edbb0372f084a2ae8c1b8103b3bff1d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 12 Sep 2018 15:19:08 +0200 Subject: netfilter: conntrack: remove the l4proto->new() function ->new() gets invoked after ->error() and before ->packet() if a conntrack lookup has found no result for the tuple. We can fold it into ->packet() -- the packet() implementations can check if the conntrack is confirmed (new) or not (already in hash). If its unconfirmed, the conntrack isn't in the hash yet so current skb created a new conntrack entry. Only relevant side effect -- if packet() doesn't return NF_ACCEPT but -NF_ACCEPT (or drop), while the conntrack was just created, then the newly allocated conntrack is freed right away, rather than not created in the first place. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index a857a0adfb31..016958e67fcc 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -48,11 +48,6 @@ struct nf_conntrack_l4proto { enum ip_conntrack_info ctinfo, const struct nf_hook_state *state); - /* Called when a new connection for this protocol found; - * returns TRUE if it's OK. If so, packet() called next. */ - bool (*new)(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff); - /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); -- cgit v1.2.3 From 83d213fd9d1a56108584cd812333462caa39a747 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 12 Sep 2018 15:19:09 +0200 Subject: netfilter: conntrack: deconstify packet callback skb pointer Only two protocols need the ->error() function: icmp and icmpv6. This is because icmp error mssages might be RELATED to an existing connection (e.g. PMTUD, port unreachable and the like), and their ->error() handlers do this. The error callback is already optional, so remove it for udp and call them from ->packet() instead. As the error() callback can call checksum functions that write to skb->csum*, the const qualifier has to be removed as well. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 016958e67fcc..39f0c84f71b9 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -43,7 +43,7 @@ struct nf_conntrack_l4proto { /* Returns verdict for packet, or -1 for invalid. */ int (*packet)(struct nf_conn *ct, - const struct sk_buff *skb, + struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state); -- cgit v1.2.3 From 6fe78fa484a5dad030b24e33e0cedc5d5bbd0fde Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 12 Sep 2018 15:19:11 +0200 Subject: netfilter: conntrack: remove error callback and handle icmp from core icmp(v6) are the only two layer four protocols that need the error() callback (to handle icmp errors that are related to an established connections, e.g. packet too big, port unreachable and the like). Remove the error callback and handle these two special cases from the core. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 39f0c84f71b9..7fdb4b95bba4 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -51,10 +51,6 @@ struct nf_conntrack_l4proto { /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); - int (*error)(struct nf_conn *tmpl, struct sk_buff *skb, - unsigned int dataoff, - const struct nf_hook_state *state); - /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); @@ -97,6 +93,15 @@ struct nf_conntrack_l4proto { struct module *me; }; +int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, + struct sk_buff *skb, + unsigned int dataoff, + const struct nf_hook_state *state); + +int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, + struct sk_buff *skb, + unsigned int dataoff, + const struct nf_hook_state *state); /* Existing built-in generic protocol */ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; -- cgit v1.2.3 From ca2ca6e1c04e64413f5fb9a5d54fb8b0bdd86467 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 12 Sep 2018 15:19:12 +0200 Subject: netfilter: conntrack: remove unused proto arg from netns init functions Its unused, next patch will remove l4proto->l3proto number to simplify l4 protocol demuxer lookup. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7fdb4b95bba4..420823a8648f 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -84,7 +84,7 @@ struct nf_conntrack_l4proto { #endif unsigned int *net_id; /* Init l4proto pernet data */ - int (*init_net)(struct net *net, u_int16_t proto); + int (*init_net)(struct net *net); /* Return the per-net protocol part. */ struct nf_proto_net *(*get_net_proto)(struct net *net); -- cgit v1.2.3 From dd2934a95701576203b2f61e8ded4e4a2f9183ea Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Sep 2018 12:02:54 +0200 Subject: netfilter: conntrack: remove l3->l4 mapping information l4 protocols are demuxed by l3num, l4num pair. However, almost all l4 trackers are l3 agnostic. Only exceptions are: - gre, icmp (ipv4 only) - icmpv6 (ipv6 only) This commit gets rid of the l3 mapping, l4 trackers can now be looked up by their IPPROTO_XXX value alone, which gets rid of the additional l3 indirection. For icmp, ipcmp6 and gre, add a check on state->pf and return -NF_ACCEPT in case we're asked to track e.g. icmpv6-in-ipv4, this seems more fitting than using the generic tracker. Additionally we can kill the 2nd l4proto definitions that were needed for v4/v6 split -- they are now the same so we can use single l4proto struct for each protocol, rather than two. The EXPORT_SYMBOLs can be removed as all these object files are part of nf_conntrack with no external references. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 13 +++++-------- include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 13 ------------- include/net/netfilter/nf_conntrack_l4proto.h | 9 ++------- 3 files changed, 7 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index c84b51682f08..135ee702c7b0 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -10,20 +10,17 @@ #ifndef _NF_CONNTRACK_IPV4_H #define _NF_CONNTRACK_IPV4_H -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp; extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; #ifdef CONFIG_NF_CT_PROTO_DCCP -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp; #endif #ifdef CONFIG_NF_CT_PROTO_SCTP -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp; #endif #ifdef CONFIG_NF_CT_PROTO_UDPLITE -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite; #endif -int nf_conntrack_ipv4_compat_init(void); -void nf_conntrack_ipv4_compat_fini(void); - #endif /*_NF_CONNTRACK_IPV4_H*/ diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index effa8dfba68c..7b3c873f8839 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -2,20 +2,7 @@ #ifndef _NF_CONNTRACK_IPV6_H #define _NF_CONNTRACK_IPV6_H -extern const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; - -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; -#ifdef CONFIG_NF_CT_PROTO_DCCP -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; -#endif -#ifdef CONFIG_NF_CT_PROTO_SCTP -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6; -#endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; -#endif #include extern struct ctl_table nf_ct_ipv6_sysctl_table[]; diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 420823a8648f..d838a93430a1 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -18,9 +18,6 @@ struct seq_file; struct nf_conntrack_l4proto { - /* L3 Protocol number. */ - u_int16_t l3proto; - /* L4 Protocol number. */ u_int8_t l4proto; @@ -107,11 +104,9 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; #define MAX_NF_CT_PROTO 256 -const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, - u_int8_t l4proto); +const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto); -const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, - u_int8_t l4proto); +const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4proto); void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); /* Protocol pernet registration. */ -- cgit v1.2.3 From 93185c80a5f748620f5652e492f2a1c8d89db593 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 12 Sep 2018 15:19:14 +0200 Subject: netfilter: conntrack: clamp l4proto array size at largers supported protocol All higher l4proto numbers are handled by the generic tracker; the l4proto lookup function already returns generic one in case the l4proto number exceeds max size. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index d838a93430a1..eed04af9b75e 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -102,7 +102,7 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, /* Existing built-in generic protocol */ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; -#define MAX_NF_CT_PROTO 256 +#define MAX_NF_CT_PROTO IPPROTO_UDPLITE const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto); -- cgit v1.2.3 From 78f2756c5fc0bf17560766dbc5aaa1e4a7ba66e4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 20 Sep 2018 13:50:47 -0700 Subject: net/ipv4: Move device validation to helper Move the device matching check in __fib_validate_source to a helper and export it for use by netfilter modules. Code move only; no functional change intended. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 69c91d1934c1..f7c109e37298 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -373,6 +373,7 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net, extern const struct nla_policy rtm_ipv4_policy[]; void ip_fib_init(void); __be32 fib_compute_spec_dst(struct sk_buff *skb); +bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); -- cgit v1.2.3 From 5d773ff41a7cdf0ef6cc6647435d59f0cf53e7b1 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 17 Sep 2018 13:30:46 +0300 Subject: net/mlx5: Rename incorrect naming in IFC file Remove a trailing underscore from the multicast/unicast names. Signed-off-by: Mark Bloch Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3a4a2e0567e9..4c7a1d25d73b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2559,8 +2559,8 @@ enum { }; enum { - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_ = 0x1, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST_ = 0x2, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST = 0x1, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST = 0x2, }; struct mlx5_ifc_tirc_bits { -- cgit v1.2.3 From a42055e8d2c30d4decfc13ce943d09c7b9dad221 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Fri, 21 Sep 2018 09:46:13 +0530 Subject: net/tls: Add support for async encryption of records for performance In current implementation, tls records are encrypted & transmitted serially. Till the time the previously submitted user data is encrypted, the implementation waits and on finish starts transmitting the record. This approach of encrypt-one record at a time is inefficient when asynchronous crypto accelerators are used. For each record, there are overheads of interrupts, driver softIRQ scheduling etc. Also the crypto accelerator sits idle most of time while an encrypted record's pages are handed over to tcp stack for transmission. This patch enables encryption of multiple records in parallel when an async capable crypto accelerator is present in system. This is achieved by allowing the user space application to send more data using sendmsg() even while previously issued data is being processed by crypto accelerator. This requires returning the control back to user space application after submitting encryption request to accelerator. This also means that zero-copy mode of encryption cannot be used with async accelerator as we must be done with user space application buffer before returning from sendmsg(). There can be multiple records in flight to/from the accelerator. Each of the record is represented by 'struct tls_rec'. This is used to store the memory pages for the record. After the records are encrypted, they are added in a linked list called tx_ready_list which contains encrypted tls records sorted as per tls sequence number. The records from tx_ready_list are transmitted using a newly introduced function called tls_tx_records(). The tx_ready_list is polled for any record ready to be transmitted in sendmsg(), sendpage() after initiating encryption of new tls records. This achieves parallel encryption and transmission of records when async accelerator is present. There could be situation when crypto accelerator completes encryption later than polling of tx_ready_list by sendmsg()/sendpage(). Therefore we need a deferred work context to be able to transmit records from tx_ready_list. The deferred work context gets scheduled if applications are not sending much data through the socket. If the applications issue sendmsg()/sendpage() in quick succession, then the scheduling of tx_work_handler gets cancelled as the tx_ready_list would be polled from application's context itself. This saves scheduling overhead of deferred work. The patch also brings some side benefit. We are able to get rid of the concept of CLOSED record. This is because the records once closed are either encrypted and then placed into tx_ready_list or if encryption fails, the socket error is set. This simplifies the kernel tls sendpath. However since tls_device.c is still using macros, accessory functions for CLOSED records have been retained. Signed-off-by: Vakul Garg Signed-off-by: David S. Miller --- include/net/tls.h | 70 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 9f3c4ea9ad6f..3aa73e2d8823 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -41,7 +41,7 @@ #include #include #include - +#include #include @@ -93,24 +93,47 @@ enum { TLS_NUM_CONFIG, }; -struct tls_sw_context_tx { - struct crypto_aead *aead_send; - struct crypto_wait async_wait; - - char aad_space[TLS_AAD_SPACE_SIZE]; - - unsigned int sg_plaintext_size; - int sg_plaintext_num_elem; +/* TLS records are maintained in 'struct tls_rec'. It stores the memory pages + * allocated or mapped for each TLS record. After encryption, the records are + * stores in a linked list. + */ +struct tls_rec { + struct list_head list; + int tx_flags; struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS]; - - unsigned int sg_encrypted_size; - int sg_encrypted_num_elem; struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS]; /* AAD | sg_plaintext_data | sg_tag */ struct scatterlist sg_aead_in[2]; /* AAD | sg_encrypted_data (data contain overhead for hdr&iv&tag) */ struct scatterlist sg_aead_out[2]; + + unsigned int sg_plaintext_size; + unsigned int sg_encrypted_size; + int sg_plaintext_num_elem; + int sg_encrypted_num_elem; + + char aad_space[TLS_AAD_SPACE_SIZE]; + struct aead_request aead_req; + u8 aead_req_ctx[]; +}; + +struct tx_work { + struct delayed_work work; + struct sock *sk; +}; + +struct tls_sw_context_tx { + struct crypto_aead *aead_send; + struct crypto_wait async_wait; + struct tx_work tx_work; + struct tls_rec *open_rec; + struct list_head tx_ready_list; + atomic_t encrypt_pending; + int async_notify; + +#define BIT_TX_SCHEDULED 0 + unsigned long tx_bitmask; }; struct tls_sw_context_rx { @@ -197,6 +220,8 @@ struct tls_context { struct scatterlist *partially_sent_record; u16 partially_sent_offset; + u64 tx_seq_number; /* Next TLS seqnum to be transmitted */ + unsigned long flags; bool in_tcp_sendpages; @@ -261,6 +286,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, void tls_device_sk_destruct(struct sock *sk); void tls_device_init(void); void tls_device_cleanup(void); +int tls_tx_records(struct sock *sk, int flags); struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, u32 seq, u64 *p_record_sn); @@ -279,6 +305,9 @@ void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); int tls_push_sg(struct sock *sk, struct tls_context *ctx, struct scatterlist *sg, u16 first_offset, int flags); +int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, + int flags); + int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx, int flags, long *timeo); @@ -312,6 +341,23 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) return tls_ctx->pending_open_record_frags; } +static inline bool is_tx_ready(struct tls_context *tls_ctx, + struct tls_sw_context_tx *ctx) +{ + struct tls_rec *rec; + u64 seq; + + rec = list_first_entry(&ctx->tx_ready_list, struct tls_rec, list); + if (!rec) + return false; + + seq = be64_to_cpup((const __be64 *)&rec->aad_space); + if (seq == tls_ctx->tx_seq_number) + return true; + else + return false; +} + struct sk_buff * tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, struct sk_buff *skb); -- cgit v1.2.3 From 30f8eb55873ef078f5f02f636061d9399debbeab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Fri, 21 Sep 2018 12:39:29 +0200 Subject: net: if_arp: Fix incorrect indents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixing incorrect indents and align comments. Signed-off-by: Håkon Bugge Signed-off-by: David S. Miller --- include/uapi/linux/if_arp.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h index 4605527ca41b..b68b4b3d9172 100644 --- a/include/uapi/linux/if_arp.h +++ b/include/uapi/linux/if_arp.h @@ -114,18 +114,18 @@ /* ARP ioctl request. */ struct arpreq { - struct sockaddr arp_pa; /* protocol address */ - struct sockaddr arp_ha; /* hardware address */ - int arp_flags; /* flags */ - struct sockaddr arp_netmask; /* netmask (only for proxy arps) */ - char arp_dev[16]; + struct sockaddr arp_pa; /* protocol address */ + struct sockaddr arp_ha; /* hardware address */ + int arp_flags; /* flags */ + struct sockaddr arp_netmask; /* netmask (only for proxy arps) */ + char arp_dev[16]; }; struct arpreq_old { - struct sockaddr arp_pa; /* protocol address */ - struct sockaddr arp_ha; /* hardware address */ - int arp_flags; /* flags */ - struct sockaddr arp_netmask; /* netmask (only for proxy arps) */ + struct sockaddr arp_pa; /* protocol address */ + struct sockaddr arp_ha; /* hardware address */ + int arp_flags; /* flags */ + struct sockaddr arp_netmask; /* netmask (only for proxy arps) */ }; /* ARP Flag values. */ -- cgit v1.2.3 From 6a12709da354ea149fdf86c4c9aba5b5033e9cf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Fri, 21 Sep 2018 12:39:30 +0200 Subject: net: if_arp: use define instead of hard-coded value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit uapi/linux/if_arp.h includes linux/netdevice.h, which uses IFNAMSIZ. Hence, use it instead of hard-coded value. Signed-off-by: Håkon Bugge Signed-off-by: David S. Miller --- include/uapi/linux/if_arp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h index b68b4b3d9172..c3cc5a9e5eaf 100644 --- a/include/uapi/linux/if_arp.h +++ b/include/uapi/linux/if_arp.h @@ -118,7 +118,7 @@ struct arpreq { struct sockaddr arp_ha; /* hardware address */ int arp_flags; /* flags */ struct sockaddr arp_netmask; /* netmask (only for proxy arps) */ - char arp_dev[16]; + char arp_dev[IFNAMSIZ]; }; struct arpreq_old { -- cgit v1.2.3 From 72b0094f918294e6cb8cf5c3b4520d928fbb1a57 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Sep 2018 08:51:46 -0700 Subject: tcp: switch tcp_clock_ns() to CLOCK_TAI base TCP pacing is either implemented in sch_fq or internally. We have the goal of being able to offload pacing on the NICS. TCP will soon provide per skb skb->tstamp as early departure time. Like ETF in commit 25db26a91364 ("net/sched: Introduce the ETF Qdisc") we chose CLOCK_T as the clock base, so that TCP and pacers can share a common clock, to get better RTT samples (without pacing artificially inflating these samples). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 770917d0caa7..c6f0bc1dc678 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -732,7 +732,7 @@ void tcp_send_window_probe(struct sock *sk); static inline u64 tcp_clock_ns(void) { - return local_clock(); + return ktime_get_tai_ns(); } static inline u64 tcp_clock_us(void) -- cgit v1.2.3 From 2fd66ffba50716fc5ab481c48db643af3bda2276 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Sep 2018 08:51:47 -0700 Subject: tcp: introduce tcp_skb_timestamp_us() helper There are few places where TCP reads skb->skb_mstamp expecting a value in usec unit. skb->tstamp (aka skb->skb_mstamp) will soon store CLOCK_TAI nsec value. Add tcp_skb_timestamp_us() to provide proper conversion when needed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index c6f0bc1dc678..0ca5ea10dc06 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -774,6 +774,12 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ); } +/* provide the departure time in us unit */ +static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) +{ + return skb->skb_mstamp; +} + #define tcp_flag_byte(th) (((u_int8_t *)th)[13]) @@ -1940,7 +1946,7 @@ static inline s64 tcp_rto_delta_us(const struct sock *sk) { const struct sk_buff *skb = tcp_rtx_queue_head(sk); u32 rto = inet_csk(sk)->icsk_rto; - u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto); + u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto); return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; } -- cgit v1.2.3 From 9799ccb0e984a5c1311b22a212e7ff96e8b736de Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Sep 2018 08:51:49 -0700 Subject: tcp: add tcp_wstamp_ns socket field TCP will soon provide earliest departure time on TX skbs. It needs to track this in a new variable. tcp_mstamp_refresh() needs to update this variable, and became too big to stay an inline. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ include/net/tcp.h | 12 +----------- 2 files changed, 3 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 263e37271afd..848f5b25e178 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -248,6 +248,8 @@ struct tcp_sock { syn_smc:1; /* SYN includes SMC */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ + u64 tcp_wstamp_ns; /* departure time for next sent data packet */ + /* RTT measurement */ u64 tcp_mstamp; /* most recent packet received/sent */ u32 srtt_us; /* smoothed round trip time << 3 in usecs */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 0ca5ea10dc06..370198fdc65d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -752,17 +752,7 @@ static inline u32 tcp_time_stamp_raw(void) return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ); } - -/* Refresh 1us clock of a TCP socket, - * ensuring monotically increasing values. - */ -static inline void tcp_mstamp_refresh(struct tcp_sock *tp) -{ - u64 val = tcp_clock_us(); - - if (val > tp->tcp_mstamp) - tp->tcp_mstamp = val; -} +void tcp_mstamp_refresh(struct tcp_sock *tp); static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) { -- cgit v1.2.3 From d3edd06ea8ea9e03de6567fda80b8be57e21a537 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Sep 2018 08:51:50 -0700 Subject: tcp: provide earliest departure time in skb->tstamp Switch internal TCP skb->skb_mstamp to skb->skb_mstamp_ns, from usec units to nsec units. Do not clear skb->tstamp before entering IP stacks in TX, so that qdisc or devices can implement pacing based on the earliest departure time instead of socket sk->sk_pacing_rate Packets are fed with tcp_wstamp_ns, and following patch will update tcp_wstamp_ns when both TCP and sch_fq switch to the earliest departure time mechanism. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- include/net/tcp.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e3a53ca4a9b5..86f337e9a81d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -689,7 +689,7 @@ struct sk_buff { union { ktime_t tstamp; - u64 skb_mstamp; + u64 skb_mstamp_ns; /* earliest departure time */ }; /* * This is the control buffer. It is free to use for every diff --git a/include/net/tcp.h b/include/net/tcp.h index 370198fdc65d..ff15d8e0d525 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -761,13 +761,13 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) { - return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ); + return div_u64(skb->skb_mstamp_ns, NSEC_PER_SEC / TCP_TS_HZ); } /* provide the departure time in us unit */ static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) { - return skb->skb_mstamp; + return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC); } @@ -813,7 +813,7 @@ struct tcp_skb_cb { #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ #define TCPCB_LOST 0x04 /* SKB is lost */ #define TCPCB_TAGBITS 0x07 /* All tag bits */ -#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp) */ +#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp_ns) */ #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \ TCPCB_REPAIRED) -- cgit v1.2.3 From 5e111210a44301304f9054e995bf33f69b6de76f Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 21 Sep 2018 07:13:54 -0400 Subject: net/core: Add new basic hardware counter Add a new hardware specific basic counter, TCA_STATS_BASIC_HW. This can be used to count packets/bytes processed by hardware offload. Signed-off-by: Eelco Chaudron Signed-off-by: David S. Miller --- include/net/gen_stats.h | 4 ++++ include/uapi/linux/gen_stats.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 883bb9085f15..946bd53a9f81 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -44,6 +44,10 @@ void __gnet_stats_copy_basic(const seqcount_t *running, struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); +int gnet_stats_copy_basic_hw(const seqcount_t *running, + struct gnet_dump *d, + struct gnet_stats_basic_cpu __percpu *cpu, + struct gnet_stats_basic_packed *b); int gnet_stats_copy_rate_est(struct gnet_dump *d, struct net_rate_estimator __rcu **ptr); int gnet_stats_copy_queue(struct gnet_dump *d, diff --git a/include/uapi/linux/gen_stats.h b/include/uapi/linux/gen_stats.h index 24a861c0d29d..065408e16a80 100644 --- a/include/uapi/linux/gen_stats.h +++ b/include/uapi/linux/gen_stats.h @@ -12,6 +12,7 @@ enum { TCA_STATS_APP, TCA_STATS_RATE_EST64, TCA_STATS_PAD, + TCA_STATS_BASIC_HW, __TCA_STATS_MAX, }; #define TCA_STATS_MAX (__TCA_STATS_MAX - 1) -- cgit v1.2.3 From 28169abadb08333eb607621faa3a1dd7109e0d45 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 21 Sep 2018 07:14:02 -0400 Subject: net/sched: Add hardware specific counters to TC actions Add additional counters that will store the bytes/packets processed by hardware. These will be exported through the netlink interface for displaying by the iproute2 tc tool Signed-off-by: Eelco Chaudron Signed-off-by: David S. Miller --- include/net/act_api.h | 8 +++++--- include/net/pkt_cls.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index c6f195b3c706..1ddff3360592 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -31,10 +31,12 @@ struct tc_action { int tcfa_action; struct tcf_t tcfa_tm; struct gnet_stats_basic_packed tcfa_bstats; + struct gnet_stats_basic_packed tcfa_bstats_hw; struct gnet_stats_queue tcfa_qstats; struct net_rate_estimator __rcu *tcfa_rate_est; spinlock_t tcfa_lock; struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw; struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie __rcu *act_cookie; struct tcf_chain *goto_chain; @@ -94,7 +96,7 @@ struct tc_action_ops { struct netlink_callback *, int, const struct tc_action_ops *, struct netlink_ext_ack *); - void (*stats_update)(struct tc_action *, u64, u32, u64); + void (*stats_update)(struct tc_action *, u64, u32, u64, bool); size_t (*get_fill_size)(const struct tc_action *act); struct net_device *(*get_dev)(const struct tc_action *a); void (*put_dev)(struct net_device *dev); @@ -182,13 +184,13 @@ int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); #endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, - u64 packets, u64 lastuse) + u64 packets, u64 lastuse, bool hw) { #ifdef CONFIG_NET_CLS_ACT if (!a->ops->stats_update) return; - a->ops->stats_update(a, bytes, packets, lastuse); + a->ops->stats_update(a, bytes, packets, lastuse, hw); #endif } diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 75a3f3fdb359..bbfe27f86d5f 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -318,7 +318,7 @@ tcf_exts_stats_update(const struct tcf_exts *exts, for (i = 0; i < exts->nr_actions; i++) { struct tc_action *a = exts->actions[i]; - tcf_action_stats_update(a, bytes, packets, lastuse); + tcf_action_stats_update(a, bytes, packets, lastuse, true); } preempt_enable(); -- cgit v1.2.3 From fc6e8073f304010605f834cb2eb8c07c46461c9d Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Sat, 22 Sep 2018 21:26:20 -0700 Subject: neighbour: send netlink notification if NTF_ROUTER changes send netlink notification if neigh_update results in NTF_ROUTER change and if NEIGH_UPDATE_F_ISROUTER is on. Also move the NTF_ROUTER change function into a helper. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/net/neighbour.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 6c1eecd56a4d..0874f7fcd859 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -544,4 +544,19 @@ static inline void neigh_update_ext_learned(struct neighbour *neigh, u32 flags, *notify = 1; } } + +static inline void neigh_update_is_router(struct neighbour *neigh, u32 flags, + int *notify) +{ + u8 ndm_flags = 0; + + ndm_flags |= (flags & NEIGH_UPDATE_F_ISROUTER) ? NTF_ROUTER : 0; + if ((neigh->flags ^ ndm_flags) & NTF_ROUTER) { + if (ndm_flags & NTF_ROUTER) + neigh->flags |= NTF_ROUTER; + else + neigh->flags &= ~NTF_ROUTER; + *notify = 1; + } +} #endif -- cgit v1.2.3 From 9932a29ab1be1427a2ccbdf852a0f131f2849685 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Mon, 24 Sep 2018 15:35:56 +0530 Subject: net/tls: Fixed race condition in async encryption On processors with multi-engine crypto accelerators, it is possible that multiple records get encrypted in parallel and their encryption completion is notified to different cpus in multicore processor. This leads to the situation where tls_encrypt_done() starts executing in parallel on different cores. In current implementation, encrypted records are queued to tx_ready_list in tls_encrypt_done(). This requires addition to linked list 'tx_ready_list' to be protected. As tls_decrypt_done() could be executing in irq content, it is not possible to protect linked list addition operation using a lock. To fix the problem, we remove linked list addition operation from the irq context. We do tx_ready_list addition/removal operation from application context only and get rid of possible multiple access to the linked list. Before starting encryption on the record, we add it to the tail of tx_ready_list. To prevent tls_tx_records() from transmitting it, we mark the record with a new flag 'tx_ready' in 'struct tls_rec'. When record encryption gets completed, tls_encrypt_done() has to only update the 'tx_ready' flag to true & linked list add operation is not required. The changed logic brings some other side benefits. Since the records are always submitted in tls sequence number order for encryption, the tx_ready_list always remains sorted and addition of new records to it does not have to traverse the linked list. Lastly, we renamed tx_ready_list in 'struct tls_sw_context_tx' to 'tx_list'. This is because now, the some of the records at the tail are not ready to transmit. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption") Signed-off-by: Vakul Garg Signed-off-by: David S. Miller --- include/net/tls.h | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 3aa73e2d8823..1615fb5ea114 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -99,6 +99,7 @@ enum { */ struct tls_rec { struct list_head list; + int tx_ready; int tx_flags; struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS]; struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS]; @@ -128,7 +129,7 @@ struct tls_sw_context_tx { struct crypto_wait async_wait; struct tx_work tx_work; struct tls_rec *open_rec; - struct list_head tx_ready_list; + struct list_head tx_list; atomic_t encrypt_pending; int async_notify; @@ -220,7 +221,6 @@ struct tls_context { struct scatterlist *partially_sent_record; u16 partially_sent_offset; - u64 tx_seq_number; /* Next TLS seqnum to be transmitted */ unsigned long flags; bool in_tcp_sendpages; @@ -341,21 +341,15 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) return tls_ctx->pending_open_record_frags; } -static inline bool is_tx_ready(struct tls_context *tls_ctx, - struct tls_sw_context_tx *ctx) +static inline bool is_tx_ready(struct tls_sw_context_tx *ctx) { struct tls_rec *rec; - u64 seq; - rec = list_first_entry(&ctx->tx_ready_list, struct tls_rec, list); + rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); if (!rec) return false; - seq = be64_to_cpup((const __be64 *)&rec->aad_space); - if (seq == tls_ctx->tx_seq_number) - return true; - else - return false; + return READ_ONCE(rec->tx_ready); } struct sk_buff * -- cgit v1.2.3 From 9ba481e2eb3b932ae5b6278342b256e4f92d2793 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 20 Sep 2018 21:35:20 +0300 Subject: net/mlx5: Set uid as part of CQ commands Set uid as part of CQ commands so that the firmware can manage the CQ object in a secured way. The firmware should mark this CQ with the given uid so that it can be used later on only by objects with the same uid. Upon DEVX flows that use this CQ (e.g. create QP command), the pointed CQ must have the same uid as of the issuer uid command. When a command is issued with uid=0 it means that the issuer of the command is trusted (i.e. kernel), in that case any pointed object can be used regardless of its uid. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/cq.h | 1 + include/linux/mlx5/mlx5_ifc.h | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 0ef6138eca49..31a750570c38 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -61,6 +61,7 @@ struct mlx5_core_cq { int reset_notify_added; struct list_head reset_notify; struct mlx5_eq *eq; + u16 uid; }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4c7a1d25d73b..1a412b355054 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5629,7 +5629,7 @@ enum { struct mlx5_ifc_modify_cq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6404,7 +6404,7 @@ struct mlx5_ifc_destroy_cq_out_bits { struct mlx5_ifc_destroy_cq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7164,7 +7164,7 @@ struct mlx5_ifc_create_cq_out_bits { struct mlx5_ifc_create_cq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; -- cgit v1.2.3 From 4ac63ec72587f7426aae15ddfe78e8ab785724dc Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 20 Sep 2018 21:35:21 +0300 Subject: net/mlx5: Set uid as part of QP commands Set uid as part of QP commands so that the firmware can manage the QP object in a secured way. That will enable using a QP that was created by verbs application to be used by the DEVX flow in case the uid is equal. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 22 +++++++++++----------- include/linux/mlx5/qp.h | 1 + 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1a412b355054..3b6a612787ac 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3394,7 +3394,7 @@ struct mlx5_ifc_sqerr2rts_qp_out_bits { struct mlx5_ifc_sqerr2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -3424,7 +3424,7 @@ struct mlx5_ifc_sqd2rts_qp_out_bits { struct mlx5_ifc_sqd2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -3629,7 +3629,7 @@ struct mlx5_ifc_rts2rts_qp_out_bits { struct mlx5_ifc_rts2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -3659,7 +3659,7 @@ struct mlx5_ifc_rtr2rts_qp_out_bits { struct mlx5_ifc_rtr2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -3689,7 +3689,7 @@ struct mlx5_ifc_rst2init_qp_out_bits { struct mlx5_ifc_rst2init_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5192,7 +5192,7 @@ struct mlx5_ifc_qp_2rst_out_bits { struct mlx5_ifc_qp_2rst_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5214,7 +5214,7 @@ struct mlx5_ifc_qp_2err_out_bits { struct mlx5_ifc_qp_2err_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5789,7 +5789,7 @@ struct mlx5_ifc_init2rtr_qp_out_bits { struct mlx5_ifc_init2rtr_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5819,7 +5819,7 @@ struct mlx5_ifc_init2init_qp_out_bits { struct mlx5_ifc_init2init_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6230,7 +6230,7 @@ struct mlx5_ifc_destroy_qp_out_bits { struct mlx5_ifc_destroy_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6895,7 +6895,7 @@ struct mlx5_ifc_create_qp_out_bits { struct mlx5_ifc_create_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 4778d41085d4..fbe322c966bc 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -471,6 +471,7 @@ struct mlx5_core_qp { int qpn; struct mlx5_rsc_debug *dbg; int pid; + u16 uid; }; struct mlx5_core_dct { -- cgit v1.2.3 From d269b3afffcb107375d9cf73127fc2e0181bc90b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 20 Sep 2018 21:35:22 +0300 Subject: net/mlx5: Set uid as part of RQ commands Set uid as part of RQ commands so that the firmware can manage the RQ object in a secured way. That will enable using an RQ that was created by verbs application to be used by the DEVX flow in case the uid is equal. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3b6a612787ac..689631ca27b8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5488,7 +5488,7 @@ enum { struct mlx5_ifc_modify_rq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6164,7 +6164,7 @@ struct mlx5_ifc_destroy_rq_out_bits { struct mlx5_ifc_destroy_rq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6847,7 +6847,7 @@ struct mlx5_ifc_create_rq_out_bits { struct mlx5_ifc_create_rq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; -- cgit v1.2.3 From 430ae0d5a3ce1350375690cb6ab29ab6fae4a9ac Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 20 Sep 2018 21:35:23 +0300 Subject: net/mlx5: Set uid as part of SQ commands Set uid as part of SQ commands so that the firmware can manage the SQ object in a secured way. That will enable using an SQ that was created by verbs application to be used by the DEVX flow in case the uid is equal. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 689631ca27b8..72dd1e49a799 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5381,7 +5381,7 @@ struct mlx5_ifc_modify_sq_out_bits { struct mlx5_ifc_modify_sq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6096,7 +6096,7 @@ struct mlx5_ifc_destroy_sq_out_bits { struct mlx5_ifc_destroy_sq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6769,7 +6769,7 @@ struct mlx5_ifc_create_sq_out_bits { struct mlx5_ifc_create_sq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; -- cgit v1.2.3 From a0d8c054318976927493ffd26055d9d183c9beec Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 20 Sep 2018 21:35:24 +0300 Subject: net/mlx5: Set uid as part of SRQ commands Set uid as part of SRQ commands so that the firmware can manage the SRQ object in a secured way. That will enable using an SRQ that was created by verbs application to be used by the DEVX flow in case the uid is equal. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 22 +++++++++++----------- include/linux/mlx5/srq.h | 1 + 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d885e9f0e054..8fb072aa8671 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -474,6 +474,7 @@ struct mlx5_core_srq { atomic_t refcount; struct completion free; + u16 uid; }; struct mlx5_eq_table { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 72dd1e49a799..85f1237d80db 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5524,7 +5524,7 @@ struct mlx5_ifc_rmp_bitmask_bits { struct mlx5_ifc_modify_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5986,7 +5986,7 @@ struct mlx5_ifc_destroy_xrq_out_bits { struct mlx5_ifc_destroy_xrq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6008,7 +6008,7 @@ struct mlx5_ifc_destroy_xrc_srq_out_bits { struct mlx5_ifc_destroy_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6074,7 +6074,7 @@ struct mlx5_ifc_destroy_srq_out_bits { struct mlx5_ifc_destroy_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6208,7 +6208,7 @@ struct mlx5_ifc_destroy_rmp_out_bits { struct mlx5_ifc_destroy_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6641,7 +6641,7 @@ struct mlx5_ifc_create_xrq_out_bits { struct mlx5_ifc_create_xrq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6665,7 +6665,7 @@ struct mlx5_ifc_create_xrc_srq_out_bits { struct mlx5_ifc_create_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6741,7 +6741,7 @@ struct mlx5_ifc_create_srq_out_bits { struct mlx5_ifc_create_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6871,7 +6871,7 @@ struct mlx5_ifc_create_rmp_out_bits { struct mlx5_ifc_create_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7272,7 +7272,7 @@ enum { struct mlx5_ifc_arm_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7300,7 +7300,7 @@ enum { struct mlx5_ifc_arm_rq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index 24ff23e27c8a..1b1f3c20c6a3 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -61,6 +61,7 @@ struct mlx5_srq_attr { u32 tm_next_tag; u32 tm_hw_phase_cnt; u32 tm_sw_phase_cnt; + u16 uid; }; struct mlx5_core_dev; -- cgit v1.2.3 From 774ea6eea29025218f75bc94c764df9a641db471 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 20 Sep 2018 21:35:25 +0300 Subject: net/mlx5: Set uid as part of DCT commands Set uid as part of DCT commands so that the firmware can manage the DCT object in a secured way. That will enable using a DCT that was created by verbs application to be used by the DEVX flow in case the uid is equal. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 85f1237d80db..62c0592a9fdb 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5918,7 +5918,7 @@ struct mlx5_ifc_drain_dct_out_bits { struct mlx5_ifc_drain_dct_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6382,7 +6382,7 @@ struct mlx5_ifc_destroy_dct_out_bits { struct mlx5_ifc_destroy_dct_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7138,7 +7138,7 @@ struct mlx5_ifc_create_dct_out_bits { struct mlx5_ifc_create_dct_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; -- cgit v1.2.3 From bd37197554eb28a7fc38e44e005e303c77f788ed Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 20 Sep 2018 21:35:26 +0300 Subject: net/mlx5: Update mlx5_ifc with DEVX UID bits Add DEVX information to WQ, SRQ, CQ, TIR, TIS, QP, RQ, XRCD, PD, MKEY and MCG. Each object that is created/destroyed/modified via verbs will be stamped with a UID based on its user context. This is already done for DEVX objects commands. This will enable the firmware to enforce the usage of kernel objects from the DEVX flow by validating that the same UID is used and the resources are really related to the same user. The addition of *_valid fields are needed to distinguish how various addresses are passed. For non-DEVX callers, all those fields will be zero. Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 67 +++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 62c0592a9fdb..68f4d5f9d929 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1290,7 +1290,9 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_118[0x3]; u8 log_wq_sz[0x5]; - u8 reserved_at_120[0x3]; + u8 dbr_umem_valid[0x1]; + u8 wq_umem_valid[0x1]; + u8 reserved_at_122[0x1]; u8 log_hairpin_num_packets[0x5]; u8 reserved_at_128[0x3]; u8 log_hairpin_data_sz[0x5]; @@ -2364,7 +2366,10 @@ struct mlx5_ifc_qpc_bits { u8 dc_access_key[0x40]; - u8 reserved_at_680[0xc0]; + u8 reserved_at_680[0x3]; + u8 dbr_umem_valid[0x1]; + + u8 reserved_at_684[0xbc]; }; struct mlx5_ifc_roce_addr_layout_bits { @@ -2464,7 +2469,7 @@ struct mlx5_ifc_xrc_srqc_bits { u8 wq_signature[0x1]; u8 cont_srq[0x1]; - u8 reserved_at_22[0x1]; + u8 dbr_umem_valid[0x1]; u8 rlky[0x1]; u8 basic_cyclic_rcv_wqe[0x1]; u8 log_rq_stride[0x3]; @@ -3128,7 +3133,9 @@ enum { struct mlx5_ifc_cqc_bits { u8 status[0x4]; - u8 reserved_at_4[0x4]; + u8 reserved_at_4[0x2]; + u8 dbr_umem_valid[0x1]; + u8 reserved_at_7[0x1]; u8 cqe_sz[0x3]; u8 cc[0x1]; u8 reserved_at_c[0x1]; @@ -5314,7 +5321,7 @@ struct mlx5_ifc_modify_tis_bitmask_bits { struct mlx5_ifc_modify_tis_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5353,7 +5360,7 @@ struct mlx5_ifc_modify_tir_out_bits { struct mlx5_ifc_modify_tir_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5454,7 +5461,7 @@ struct mlx5_ifc_rqt_bitmask_bits { struct mlx5_ifc_modify_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5641,7 +5648,10 @@ struct mlx5_ifc_modify_cq_in_bits { struct mlx5_ifc_cqc_bits cq_context; - u8 reserved_at_280[0x600]; + u8 reserved_at_280[0x40]; + + u8 cq_umem_valid[0x1]; + u8 reserved_at_2c1[0x5bf]; u8 pas[0][0x40]; }; @@ -5962,7 +5972,7 @@ struct mlx5_ifc_detach_from_mcg_out_bits { struct mlx5_ifc_detach_from_mcg_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6030,7 +6040,7 @@ struct mlx5_ifc_destroy_tis_out_bits { struct mlx5_ifc_destroy_tis_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6052,7 +6062,7 @@ struct mlx5_ifc_destroy_tir_out_bits { struct mlx5_ifc_destroy_tir_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6142,7 +6152,7 @@ struct mlx5_ifc_destroy_rqt_out_bits { struct mlx5_ifc_destroy_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6507,7 +6517,7 @@ struct mlx5_ifc_dealloc_xrcd_out_bits { struct mlx5_ifc_dealloc_xrcd_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6595,7 +6605,7 @@ struct mlx5_ifc_dealloc_pd_out_bits { struct mlx5_ifc_dealloc_pd_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6674,7 +6684,9 @@ struct mlx5_ifc_create_xrc_srq_in_bits { struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; - u8 reserved_at_280[0x600]; + u8 reserved_at_280[0x40]; + u8 xrc_srq_umem_valid[0x1]; + u8 reserved_at_2c1[0x5bf]; u8 pas[0][0x40]; }; @@ -6693,7 +6705,7 @@ struct mlx5_ifc_create_tis_out_bits { struct mlx5_ifc_create_tis_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6717,7 +6729,7 @@ struct mlx5_ifc_create_tir_out_bits { struct mlx5_ifc_create_tir_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6823,7 +6835,7 @@ struct mlx5_ifc_create_rqt_out_bits { struct mlx5_ifc_create_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6908,7 +6920,10 @@ struct mlx5_ifc_create_qp_in_bits { struct mlx5_ifc_qpc_bits qpc; - u8 reserved_at_800[0x80]; + u8 reserved_at_800[0x60]; + + u8 wq_umem_valid[0x1]; + u8 reserved_at_861[0x1f]; u8 pas[0][0x40]; }; @@ -6970,7 +6985,8 @@ struct mlx5_ifc_create_mkey_in_bits { u8 reserved_at_40[0x20]; u8 pg_access[0x1]; - u8 reserved_at_61[0x1f]; + u8 mkey_umem_valid[0x1]; + u8 reserved_at_62[0x1e]; struct mlx5_ifc_mkc_bits memory_key_mkey_entry; @@ -7173,7 +7189,10 @@ struct mlx5_ifc_create_cq_in_bits { struct mlx5_ifc_cqc_bits cq_context; - u8 reserved_at_280[0x600]; + u8 reserved_at_280[0x60]; + + u8 cq_umem_valid[0x1]; + u8 reserved_at_2e1[0x59f]; u8 pas[0][0x40]; }; @@ -7221,7 +7240,7 @@ struct mlx5_ifc_attach_to_mcg_out_bits { struct mlx5_ifc_attach_to_mcg_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7348,7 +7367,7 @@ struct mlx5_ifc_alloc_xrcd_out_bits { struct mlx5_ifc_alloc_xrcd_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7436,7 +7455,7 @@ struct mlx5_ifc_alloc_pd_out_bits { struct mlx5_ifc_alloc_pd_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; -- cgit v1.2.3 From f5bd91388e26557f64ca999e0006038c7a919308 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 7 Sep 2018 10:18:46 +0200 Subject: net: xsk: add a simple buffer reuse queue XSK UMEM is strongly single producer single consumer so reuse of frames is challenging. Add a simple "stash" of FILL packets to reuse for drivers to optionally make use of. This is useful when driver has to free (ndo_stop) or resize a ring with an active AF_XDP ZC socket. Signed-off-by: Jakub Kicinski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/net/xdp_sock.h | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 932ca0dad6f3..70a115bea4f4 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -21,6 +21,12 @@ struct xdp_umem_page { dma_addr_t dma; }; +struct xdp_umem_fq_reuse { + u32 nentries; + u32 length; + u64 handles[]; +}; + struct xdp_umem { struct xsk_queue *fq; struct xsk_queue *cq; @@ -37,6 +43,7 @@ struct xdp_umem { struct page **pgs; u32 npgs; struct net_device *dev; + struct xdp_umem_fq_reuse *fq_reuse; u16 queue_id; bool zc; spinlock_t xsk_list_lock; @@ -75,6 +82,10 @@ void xsk_umem_discard_addr(struct xdp_umem *umem); void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len); void xsk_umem_consume_tx_done(struct xdp_umem *umem); +struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries); +struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem, + struct xdp_umem_fq_reuse *newq); +void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq); static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) { @@ -85,6 +96,35 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) { return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1)); } + +/* Reuse-queue aware version of FILL queue helpers */ +static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) +{ + struct xdp_umem_fq_reuse *rq = umem->fq_reuse; + + if (!rq->length) + return xsk_umem_peek_addr(umem, addr); + + *addr = rq->handles[rq->length - 1]; + return addr; +} + +static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem) +{ + struct xdp_umem_fq_reuse *rq = umem->fq_reuse; + + if (!rq->length) + xsk_umem_discard_addr(umem); + else + rq->length--; +} + +static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr) +{ + struct xdp_umem_fq_reuse *rq = umem->fq_reuse; + + rq->handles[rq->length++] = addr; +} #else static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { @@ -128,6 +168,21 @@ static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem) { } +static inline struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries) +{ + return NULL; +} + +static inline struct xdp_umem_fq_reuse *xsk_reuseq_swap( + struct xdp_umem *umem, + struct xdp_umem_fq_reuse *newq) +{ + return NULL; +} +static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq) +{ +} + static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) { return NULL; @@ -137,6 +192,20 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) { return 0; } + +static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) +{ + return NULL; +} + +static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem) +{ +} + +static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr) +{ +} + #endif /* CONFIG_XDP_SOCKETS */ #endif /* _LINUX_XDP_SOCK_H */ -- cgit v1.2.3 From 6f99528e9797794b91b43321fbbc93fe772b0803 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 24 Sep 2018 19:22:49 +0300 Subject: net: core: netlink: add helper refcount dec and lock function Rtnl lock is encapsulated in netlink and cannot be accessed by other modules directly. This means that reference counted objects that rely on rtnl lock cannot use it with refcounter helper function that atomically releases decrements reference and obtains mutex. This patch implements simple wrapper function around refcount_dec_and_lock that obtains rtnl lock if reference counter value reached 0. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 5225832bd6ff..9cdd76348d9a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -6,6 +6,7 @@ #include #include #include +#include #include extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -34,6 +35,7 @@ extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); +extern bool refcount_dec_and_rtnl_lock(refcount_t *r); extern wait_queue_head_t netdev_unregistering_wq; extern struct rw_semaphore pernet_ops_rwsem; -- cgit v1.2.3 From 86bd446b5cebd783187ea3772ff258210de77d99 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 24 Sep 2018 19:22:50 +0300 Subject: net: sched: rename qdisc_destroy() to qdisc_put() Current implementation of qdisc_destroy() decrements Qdisc reference counter and only actually destroy Qdisc if reference counter value reached zero. Rename qdisc_destroy() to qdisc_put() in order for it to better describe the way in which this function currently implemented and used. Extract code that deallocates Qdisc into new private qdisc_destroy() function. It is intended to be shared between regular qdisc_put() and its unlocked version that is introduced in next patch in this series. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d326fd553b58..fadb1a4d4ee8 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -554,7 +554,7 @@ void dev_deactivate_many(struct list_head *head); struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); -void qdisc_destroy(struct Qdisc *qdisc); +void qdisc_put(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, unsigned int len); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, -- cgit v1.2.3 From 3a7d0d07a386716b459b00783b11a8211cefcc0f Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 24 Sep 2018 19:22:51 +0300 Subject: net: sched: extend Qdisc with rcu Currently, Qdisc API functions assume that users have rtnl lock taken. To implement rtnl unlocked classifiers update interface, Qdisc API must be extended with functions that do not require rtnl lock. Extend Qdisc structure with rcu. Implement special version of put function qdisc_put_unlocked() that is called without rtnl lock taken. This function only takes rtnl lock if Qdisc reference counter reached zero and is intended to be used as optimization. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 5 +++++ include/net/pkt_sched.h | 1 + include/net/sch_generic.h | 2 ++ 3 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 9cdd76348d9a..bb9cb84114c1 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -85,6 +85,11 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) return rtnl_dereference(dev->ingress_queue); } +static inline struct netdev_queue *dev_ingress_queue_rcu(struct net_device *dev) +{ + return rcu_dereference(dev->ingress_queue); +} + struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); #ifdef CONFIG_NET_INGRESS diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 7dc769e5452b..a16fbe9a2a67 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -102,6 +102,7 @@ int qdisc_set_default(const char *id); void qdisc_hash_add(struct Qdisc *q, bool invisible); void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); +struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle); struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab, struct netlink_ext_ack *extack); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index fadb1a4d4ee8..091b40c198ff 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -105,6 +105,7 @@ struct Qdisc { spinlock_t busylock ____cacheline_aligned_in_smp; spinlock_t seqlock; + struct rcu_head rcu; }; static inline void qdisc_refcount_inc(struct Qdisc *qdisc) @@ -555,6 +556,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); void qdisc_put(struct Qdisc *qdisc); +void qdisc_put_unlocked(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, unsigned int len); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, -- cgit v1.2.3 From 9d7e82cec35c027756ec97e274f878251f271181 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 24 Sep 2018 19:22:52 +0300 Subject: net: sched: add helper function to take reference to Qdisc Implement function to take reference to Qdisc that relies on rcu read lock instead of rtnl mutex. Function only takes reference to Qdisc if reference counter isn't zero. Intended to be used by unlocked cls API. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 091b40c198ff..43b17f82d8ee 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -115,6 +115,19 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc) refcount_inc(&qdisc->refcnt); } +/* Intended to be used by unlocked users, when concurrent qdisc release is + * possible. + */ + +static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN) + return qdisc; + if (refcount_inc_not_zero(&qdisc->refcnt)) + return qdisc; + return NULL; +} + static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) -- cgit v1.2.3 From cfebd7e242d7193a9901222b3e667788810d98c1 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 24 Sep 2018 19:22:54 +0300 Subject: net: sched: change tcf block reference counter type to refcount_t As a preparation for removing rtnl lock dependency from rules update path, change tcf block reference counter type to refcount_t to allow modification by concurrent users. In block put function perform decrement and check reference counter once to accommodate concurrent modification by unlocked users. After this change tcf_chain_put at the end of block put function is called with block->refcnt==0 and will deallocate block after the last chain is released, so there is no need to manually deallocate block in this case. However, if block reference counter reached 0 and there are no chains to release, block must still be deallocated manually. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 43b17f82d8ee..4a86f4d33f07 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -345,7 +345,7 @@ struct tcf_chain { struct tcf_block { struct list_head chain_list; u32 index; /* block index for shared blocks */ - unsigned int refcnt; + refcount_t refcnt; struct net *net; struct Qdisc *q; struct list_head cb_list; -- cgit v1.2.3 From 0607e439943bd150e53eed2979f9c69aa61c37ce Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 24 Sep 2018 19:22:57 +0300 Subject: net: sched: implement tcf_block_refcnt_{get|put}() Implement get/put function for blocks that only take/release the reference and perform deallocation. These functions are intended to be used by unlocked rules update path to always hold reference to block while working with it. They use on new fine-grained locking mechanisms introduced in previous patches in this set, instead of relying on global protection provided by rtnl lock. Extract code that is common with tcf_block_detach_ext() into common function __tcf_block_put(). Extend tcf_block with rcu to allow safe deallocation when it is accessed concurrently. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 4a86f4d33f07..7a6b71ee5433 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -357,6 +357,7 @@ struct tcf_block { struct tcf_chain *chain; struct list_head filter_chain_list; } chain0; + struct rcu_head rcu; }; static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) -- cgit v1.2.3 From cd11d11286cba88aab5b1da1c83ee36e5b5cefb7 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 26 Sep 2018 18:29:06 +0200 Subject: net/af_iucv: locate IUCV header via skb_network_header() This patch attempts to untangle the TX and RX code in qeth from af_iucv's respective HiperTransport path: On the TX side, pointing skb_network_header() at the IUCV header means that qeth_l3_fill_af_iucv_hdr() no longer needs a magical offset to access the header. On the RX side, qeth pulls the (fake) L2 header off the skb like any normal ethernet driver would. This makes working with the IUCV header in af_iucv easier, since we no longer have to assume a fixed skb layout. While at it, replace the open-coded length checks in af_iucv's RX path with pskb_may_pull(). Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- include/net/iucv/af_iucv.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index f4c21b5a1242..14a490246be9 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -80,6 +80,11 @@ struct af_iucv_trans_hdr { u8 pad; /* total 104 bytes */ } __packed; +static inline struct af_iucv_trans_hdr *iucv_trans_hdr(struct sk_buff *skb) +{ + return (struct af_iucv_trans_hdr *)skb_network_header(skb); +} + enum iucv_tx_notify { /* transmission of skb is completed and was successful */ TX_NOTIFY_OK = 0, -- cgit v1.2.3 From af4325ecc24f45933d5567e72227cff2c1594764 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Mon, 24 Sep 2018 20:57:29 +0800 Subject: tcp: expose sk_state in tcp_retransmit_skb tracepoint After sk_state exposed, we can get in which state this retransmission occurs. That could give us more detail for dignostic. For example, if this retransmission occurs in SYN_SENT state, it may also indicates that the syn packet may be dropped on the remote peer due to syn backlog queue full and then we could check the remote peer. BTW,SYNACK retransmission is traced in tcp_retransmit_synack tracepoint. Signed-off-by: Yafang Shao Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index ac55b328d61b..2bc9960a31aa 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -56,6 +56,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, TP_STRUCT__entry( __field(const void *, skbaddr) __field(const void *, skaddr) + __field(int, state) __field(__u16, sport) __field(__u16, dport) __array(__u8, saddr, 4) @@ -70,6 +71,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, __entry->skbaddr = skb; __entry->skaddr = sk; + __entry->state = sk->sk_state; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); @@ -84,9 +86,10 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s\n", __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, - __entry->saddr_v6, __entry->daddr_v6) + __entry->saddr_v6, __entry->daddr_v6, + show_tcp_state_name(__entry->state)) ); DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb, -- cgit v1.2.3 From d888f39666774c7debfa34e4e20ba33cf61a6d71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 25 Sep 2018 20:56:26 -0700 Subject: net-ipv4: remove 2 always zero parameters from ipv4_update_pmtu() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (the parameters in question are mark and flow_flags) Reviewed-by: David Ahern Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/net/route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index bb53cdba38dc..73c605bdd6d8 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -201,7 +201,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, - u32 mark, u8 protocol, int flow_flags); + u8 protocol); void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu); void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, u8 protocol, int flow_flags); -- cgit v1.2.3 From 1042caa79e9351b81ed19dc8d2d7fd6ff51a4422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 25 Sep 2018 20:56:27 -0700 Subject: net-ipv4: remove 2 always zero parameters from ipv4_redirect() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (the parameters in question are mark and flow_flags) Reviewed-by: David Ahern Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/net/route.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index 73c605bdd6d8..9883dc82f723 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -203,8 +203,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u8 protocol); void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu); -void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, - u8 protocol, int flow_flags); +void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u8 protocol); void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk); void ip_rt_send_redirect(struct sk_buff *skb); -- cgit v1.2.3 From b950aa88638c52a013504f025e0b8f99bf2dc26e Mon Sep 17 00:00:00 2001 From: Ankit Navik Date: Fri, 17 Aug 2018 07:29:19 +0530 Subject: Bluetooth: Add definitions and track LE resolve list modification Add the definitions for adding entries to the LE resolve list and removing entries from the LE resolve list. When the LE resolve list gets changed via HCI commands make sure that the internal storage of the resolve list entries gets updated. Signed-off-by: Ankit Navik Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 14 ++++++++++++++ include/net/bluetooth/hci_core.h | 15 +++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index cdd9f1fe7cfa..c36dc1e20556 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1517,6 +1517,20 @@ struct hci_cp_le_write_def_data_len { __le16 tx_time; } __packed; +#define HCI_OP_LE_ADD_TO_RESOLV_LIST 0x2027 +struct hci_cp_le_add_to_resolv_list { + __u8 bdaddr_type; + bdaddr_t bdaddr; + __u8 peer_irk[16]; + __u8 local_irk[16]; +} __packed; + +#define HCI_OP_LE_DEL_FROM_RESOLV_LIST 0x2028 +struct hci_cp_le_del_from_resolv_list { + __u8 bdaddr_type; + bdaddr_t bdaddr; +} __packed; + #define HCI_OP_LE_CLEAR_RESOLV_LIST 0x2029 #define HCI_OP_LE_READ_RESOLV_LIST_SIZE 0x202a diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0db1b9b428b7..9b0f821b2d3a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -103,6 +103,14 @@ struct bdaddr_list { u8 bdaddr_type; }; +struct bdaddr_list_with_irk { + struct list_head list; + bdaddr_t bdaddr; + u8 bdaddr_type; + u8 peer_irk[16]; + u8 local_irk[16]; +}; + struct bt_uuid { struct list_head list; u8 uuid[16]; @@ -1058,8 +1066,15 @@ int hci_inquiry(void __user *arg); struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *list, bdaddr_t *bdaddr, u8 type); +struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk( + struct list_head *list, bdaddr_t *bdaddr, + u8 type); int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type); +int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr, + u8 type, u8 *peer_irk, u8 *local_irk); int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type); +int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr, + u8 type); void hci_bdaddr_list_clear(struct list_head *list); struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, -- cgit v1.2.3 From fe1493101ac1313cbdbef1af65342fb17d944e71 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 4 Sep 2018 13:39:20 +0300 Subject: Bluetooth: L2CAP: Derive MPS from connection MTU This ensures the MPS can fit in a single HCI fragment so each segment don't have to be reassembled at HCI level, in addition to that also remove the debugfs entry to configure the MPS. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 0697fd413087..17296675a0b1 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -456,7 +456,6 @@ struct l2cap_conn_param_update_rsp { #define L2CAP_CONN_PARAM_REJECTED 0x0001 #define L2CAP_LE_MAX_CREDITS 10 -#define L2CAP_LE_DEFAULT_MPS 230 struct l2cap_le_conn_req { __le16 psm; -- cgit v1.2.3 From 96cd8eaa131f0ffd4cfae09e1b4bdfafb9570907 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 4 Sep 2018 13:39:21 +0300 Subject: Bluetooth: L2CAP: Derive rx credits from MTU and MPS Give enough rx credits for a full packet instead of using an arbitrary number which may not be enough depending on the MTU and MPS which can cause interruptions while waiting for more credits, also remove debugfs entry for l2cap_le_max_credits. With these changes the credits are restored after each SDU is received instead of using fixed threshold, this way it is garanteed that there will always be enough credits to send a packet without waiting more credits to arrive. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 17296675a0b1..3555440e14fc 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -455,8 +455,6 @@ struct l2cap_conn_param_update_rsp { #define L2CAP_CONN_PARAM_ACCEPTED 0x0000 #define L2CAP_CONN_PARAM_REJECTED 0x0001 -#define L2CAP_LE_MAX_CREDITS 10 - struct l2cap_le_conn_req { __le16 psm; __le16 scid; -- cgit v1.2.3 From fb961945457f5177072c968aa38fee910ab893b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Sun, 23 Sep 2018 20:26:15 +0200 Subject: netfilter: nf_tables: add SECMARK support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the ability to set the security context of packets within the nf_tables framework. Add a nft_object for holding security contexts in the kernel and manipulating packets on the wire. Convert the security context strings at rule addition time to security identifiers. This is the same behavior like in xt_SECMARK and offers better performance than computing it per packet. Set the maximum security context length to 256. Signed-off-by: Christian Göttsche Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 4 ++++ include/uapi/linux/netfilter/nf_tables.h | 18 +++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 8da837d2aaf9..2046d104f323 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -16,6 +16,10 @@ extern struct nft_expr_type nft_meta_type; extern struct nft_expr_type nft_rt_type; extern struct nft_expr_type nft_exthdr_type; +#ifdef CONFIG_NETWORK_SECMARK +extern struct nft_object_type nft_secmark_obj_type; +#endif + int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 702e4f0bec56..5444e76870bb 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1176,6 +1176,21 @@ enum nft_quota_attributes { }; #define NFTA_QUOTA_MAX (__NFTA_QUOTA_MAX - 1) +/** + * enum nft_secmark_attributes - nf_tables secmark object netlink attributes + * + * @NFTA_SECMARK_CTX: security context (NLA_STRING) + */ +enum nft_secmark_attributes { + NFTA_SECMARK_UNSPEC, + NFTA_SECMARK_CTX, + __NFTA_SECMARK_MAX, +}; +#define NFTA_SECMARK_MAX (__NFTA_SECMARK_MAX - 1) + +/* Max security context length */ +#define NFT_SECMARK_CTX_MAXLEN 256 + /** * enum nft_reject_types - nf_tables reject expression reject types * @@ -1432,7 +1447,8 @@ enum nft_ct_timeout_timeout_attributes { #define NFT_OBJECT_CONNLIMIT 5 #define NFT_OBJECT_TUNNEL 6 #define NFT_OBJECT_CT_TIMEOUT 7 -#define __NFT_OBJECT_MAX 8 +#define NFT_OBJECT_SECMARK 8 +#define __NFT_OBJECT_MAX 9 #define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) /** -- cgit v1.2.3 From fe3b30ddb90face841b2ede3b73ed2e9cfece6ba Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Sep 2018 11:15:30 +0200 Subject: netlink: remove NLA_NESTED_COMPAT This isn't used anywhere, so we might as well get rid of it. Reviewed-by: David Ahern Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/netlink.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 318b1ded3833..b680fe365e91 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -172,7 +172,6 @@ enum { NLA_FLAG, NLA_MSECS, NLA_NESTED, - NLA_NESTED_COMPAT, NLA_NUL_STRING, NLA_BINARY, NLA_S8, @@ -203,7 +202,6 @@ enum { * NLA_BINARY Maximum length of attribute payload * NLA_NESTED Don't use `len' field -- length verification is * done by checking len of nested header (or empty) - * NLA_NESTED_COMPAT Minimum length of structure payload * NLA_U8, NLA_U16, * NLA_U32, NLA_U64, * NLA_S8, NLA_S16, -- cgit v1.2.3 From 48fde90a78f8c67e2bec5061f9725fe363519feb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Sep 2018 11:15:31 +0200 Subject: netlink: make validation_data const The validation data is only used within the policy that should usually already be const, and isn't changed in any code that uses it. Therefore, make the validation_data pointer const. While at it, remove the duplicate variable in the bitfield validation that I'd otherwise have to change to const. Reviewed-by: David Ahern Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index b680fe365e91..0d698215d4d9 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -237,7 +237,7 @@ enum { struct nla_policy { u16 type; u16 len; - void *validation_data; + const void *validation_data; }; #define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_EXACT_LEN, .len = _len } -- cgit v1.2.3 From 9a659a35ba177cec30676e170fb6ed98157bcb0d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Sep 2018 11:15:33 +0200 Subject: netlink: allow NLA_NESTED to specify nested policy to validate Now that we have a validation_data pointer, and the len field in the policy is unused for NLA_NESTED, we can allow using them both to have nested validation. This can be nice in code, although we still have to use nla_parse_nested() or similar which would also take a policy; however, it also serves as documentation in the policy without requiring a look at the code. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/netlink.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 0d698215d4d9..91907852da1c 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -200,8 +200,10 @@ enum { * NLA_NUL_STRING Maximum length of string (excluding NUL) * NLA_FLAG Unused * NLA_BINARY Maximum length of attribute payload - * NLA_NESTED Don't use `len' field -- length verification is - * done by checking len of nested header (or empty) + * NLA_NESTED Length verification is done by checking len of + * nested header (or empty); len field is used if + * validation_data is also used, for the max attr + * number in the nested policy. * NLA_U8, NLA_U16, * NLA_U32, NLA_U64, * NLA_S8, NLA_S16, @@ -224,6 +226,10 @@ enum { * NLA_REJECT This attribute is always rejected and validation data * may point to a string to report as the error instead * of the generic one in extended ACK. + * NLA_NESTED Points to a nested policy to validate, must also set + * `len' to the max attribute number. + * Note that nla_parse() will validate, but of course not + * parse, the nested sub-policies. * All other Unused * * Example: @@ -247,6 +253,9 @@ struct nla_policy { #define NLA_POLICY_ETH_ADDR NLA_POLICY_EXACT_LEN(ETH_ALEN) #define NLA_POLICY_ETH_ADDR_COMPAT NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN) +#define NLA_POLICY_NESTED(maxattr, policy) \ + { .type = NLA_NESTED, .validation_data = policy, .len = maxattr } + /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request -- cgit v1.2.3 From 1501d13596b92d6d1f0ea5e104be838188b6e026 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Sep 2018 11:15:34 +0200 Subject: netlink: add nested array policy validation Sometimes nested netlink attributes are just used as arrays, with the nla_type() of each not being used; we have this in nl80211 and e.g. NFTA_SET_ELEM_LIST_ELEMENTS. Add the ability to validate this type of message directly in the policy, by adding the type NLA_NESTED_ARRAY which does exactly this: require a first level of nesting but ignore the attribute type, and then inside each require a second level of nested and validate those attributes against a given policy (if present). Note that some nested array types actually require that all of the entries have the same index, this is possible to express in a nested policy already, apart from the validation that only the one allowed type is used. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/netlink.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 91907852da1c..3698ca8ff92c 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -172,6 +172,7 @@ enum { NLA_FLAG, NLA_MSECS, NLA_NESTED, + NLA_NESTED_ARRAY, NLA_NUL_STRING, NLA_BINARY, NLA_S8, @@ -200,7 +201,8 @@ enum { * NLA_NUL_STRING Maximum length of string (excluding NUL) * NLA_FLAG Unused * NLA_BINARY Maximum length of attribute payload - * NLA_NESTED Length verification is done by checking len of + * NLA_NESTED, + * NLA_NESTED_ARRAY Length verification is done by checking len of * nested header (or empty); len field is used if * validation_data is also used, for the max attr * number in the nested policy. @@ -230,6 +232,12 @@ enum { * `len' to the max attribute number. * Note that nla_parse() will validate, but of course not * parse, the nested sub-policies. + * NLA_NESTED_ARRAY Points to a nested policy to validate, must also set + * `len' to the max attribute number. The difference to + * NLA_NESTED is the structure - NLA_NESTED has the + * nested attributes directly inside, while an array has + * the nested attributes at another level down and the + * attributes directly in the nesting don't matter. * All other Unused * * Example: @@ -255,6 +263,8 @@ struct nla_policy { #define NLA_POLICY_NESTED(maxattr, policy) \ { .type = NLA_NESTED, .validation_data = policy, .len = maxattr } +#define NLA_POLICY_NESTED_ARRAY(maxattr, policy) \ + { .type = NLA_NESTED_ARRAY, .validation_data = policy, .len = maxattr } /** * struct nl_info - netlink source information -- cgit v1.2.3 From 30d65e0804d58a03d1a8ea4e12c6fc07ed08218b Mon Sep 17 00:00:00 2001 From: Matias Karhumaa Date: Fri, 28 Sep 2018 21:54:30 +0300 Subject: Bluetooth: Fix debugfs NULL pointer dereference Fix crash caused by NULL pointer dereference when debugfs functions le_max_key_read, le_max_key_size_write, le_min_key_size_read or le_min_key_size_write and Bluetooth adapter was powered off. Fix is to move max_key_size and min_key_size from smp_dev to hci_dev. At the same time they were renamed to le_max_key_size and le_min_key_size. BUG: unable to handle kernel NULL pointer dereference at 00000000000002e8 PGD 0 P4D 0 Oops: 0000 [#24] SMP PTI CPU: 2 PID: 6255 Comm: cat Tainted: G D OE 4.18.9-200.fc28.x86_64 #1 Hardware name: LENOVO 4286CTO/4286CTO, BIOS 8DET76WW (1.46 ) 06/21/2018 RIP: 0010:le_max_key_size_read+0x45/0xb0 [bluetooth] Code: 00 00 00 48 83 ec 10 65 48 8b 04 25 28 00 00 00 48 89 44 24 08 31 c0 48 8b 87 c8 00 00 00 48 8d 7c 24 04 48 8b 80 48 0a 00 00 <48> 8b 80 e8 02 00 00 0f b6 48 52 e8 fb b6 b3 ed be 04 00 00 00 48 RSP: 0018:ffffab23c3ff3df0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00007f0b4ca2e000 RCX: ffffab23c3ff3f08 RDX: ffffffffc0ddb033 RSI: 0000000000000004 RDI: ffffab23c3ff3df4 RBP: 0000000000020000 R08: 0000000000000000 R09: 0000000000000000 R10: ffffab23c3ff3ed8 R11: 0000000000000000 R12: ffffab23c3ff3f08 R13: 00007f0b4ca2e000 R14: 0000000000020000 R15: ffffab23c3ff3f08 FS: 00007f0b4ca0f540(0000) GS:ffff91bd5e280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000002e8 CR3: 00000000629fa006 CR4: 00000000000606e0 Call Trace: full_proxy_read+0x53/0x80 __vfs_read+0x36/0x180 vfs_read+0x8a/0x140 ksys_read+0x4f/0xb0 do_syscall_64+0x5b/0x160 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Matias Karhumaa Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9b0f821b2d3a..e5ea633ea368 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -267,6 +267,8 @@ struct hci_dev { __u16 le_max_tx_time; __u16 le_max_rx_len; __u16 le_max_rx_time; + __u8 le_max_key_size; + __u8 le_min_key_size; __u16 discov_interleaved_timeout; __u16 conn_info_min_age; __u16 conn_info_max_age; -- cgit v1.2.3 From 80ece6a03aaf3f3215475826bdd2bb9f326bccfd Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Wed, 26 Sep 2018 16:22:08 +0530 Subject: tls: Remove redundant vars from tls record structure Structure 'tls_rec' contains sg_aead_in and sg_aead_out which point to a aad_space and then chain scatterlists sg_plaintext_data, sg_encrypted_data respectively. Rather than using chained scatterlists for plaintext and encrypted data in aead_req, it is efficient to store aad_space in sg_encrypted_data and sg_plaintext_data itself in the first index and get rid of sg_aead_in, sg_aead_in and further chaining. This requires increasing size of sg_encrypted_data & sg_plaintext_data arrarys by 1 to accommodate entry for aad_space. The code which uses sg_encrypted_data and sg_plaintext_data has been modified to skip first index as it points to aad_space. Signed-off-by: Vakul Garg Signed-off-by: David S. Miller --- include/net/tls.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 1615fb5ea114..262420cdad10 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -101,13 +101,11 @@ struct tls_rec { struct list_head list; int tx_ready; int tx_flags; - struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS]; - struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS]; /* AAD | sg_plaintext_data | sg_tag */ - struct scatterlist sg_aead_in[2]; + struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS + 1]; /* AAD | sg_encrypted_data (data contain overhead for hdr&iv&tag) */ - struct scatterlist sg_aead_out[2]; + struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS + 1]; unsigned int sg_plaintext_size; unsigned int sg_encrypted_size; -- cgit v1.2.3 From 8bad74f9840f87661f20ced3dc80c84ab4fd55a1 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Sep 2018 14:45:36 +0000 Subject: bpf: extend cgroup bpf core to allow multiple cgroup storage types In order to introduce per-cpu cgroup storage, let's generalize bpf cgroup core to support multiple cgroup storage types. Potentially, per-node cgroup storage can be added later. This commit is mostly a formal change that replaces cgroup_storage pointer with a array of cgroup_storage pointers. It doesn't actually introduce a new storage type, it will be done later. Each bpf program is now able to have one cgroup storage of each type. Signed-off-by: Roman Gushchin Acked-by: Song Liu Cc: Daniel Borkmann Cc: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 38 ++++++++++++++++++++++++++++---------- include/linux/bpf.h | 11 +++++++++-- 2 files changed, 37 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index f91b0f8ff3a9..e9871b012dac 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -2,6 +2,7 @@ #ifndef _BPF_CGROUP_H #define _BPF_CGROUP_H +#include #include #include #include @@ -22,7 +23,10 @@ struct bpf_cgroup_storage; extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) -DECLARE_PER_CPU(void*, bpf_cgroup_storage); +DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); + +#define for_each_cgroup_storage_type(stype) \ + for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) struct bpf_cgroup_storage_map; @@ -43,7 +47,7 @@ struct bpf_cgroup_storage { struct bpf_prog_list { struct list_head node; struct bpf_prog *prog; - struct bpf_cgroup_storage *storage; + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; }; struct bpf_prog_array; @@ -101,18 +105,29 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum bpf_attach_type type); -static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) +static inline enum bpf_cgroup_storage_type cgroup_storage_type( + struct bpf_map *map) { + return BPF_CGROUP_STORAGE_SHARED; +} + +static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage + *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) +{ + enum bpf_cgroup_storage_type stype; struct bpf_storage_buffer *buf; - if (!storage) - return; + for_each_cgroup_storage_type(stype) { + if (!storage[stype]) + continue; - buf = READ_ONCE(storage->buf); - this_cpu_write(bpf_cgroup_storage, &buf->data[0]); + buf = READ_ONCE(storage[stype]->buf); + this_cpu_write(bpf_cgroup_storage[stype], &buf->data[0]); + } } -struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog); +struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, + enum bpf_cgroup_storage_type stype); void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage); void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, struct cgroup *cgroup, @@ -265,13 +280,14 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, return -EINVAL; } -static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {} +static inline void bpf_cgroup_storage_set( + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {} static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map) { return 0; } static inline void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map) {} static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( - struct bpf_prog *prog) { return 0; } + struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; } static inline void bpf_cgroup_storage_free( struct bpf_cgroup_storage *storage) {} @@ -293,6 +309,8 @@ static inline void bpf_cgroup_storage_free( #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) +#define for_each_cgroup_storage_type(stype) for (; false; ) + #endif /* CONFIG_CGROUP_BPF */ #endif /* _BPF_CGROUP_H */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 988a00797bcd..b457fbe7b70b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -272,6 +272,13 @@ struct bpf_prog_offload { u32 jited_len; }; +enum bpf_cgroup_storage_type { + BPF_CGROUP_STORAGE_SHARED, + __BPF_CGROUP_STORAGE_MAX +}; + +#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX + struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; @@ -289,7 +296,7 @@ struct bpf_prog_aux { struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ - struct bpf_map *cgroup_storage; + struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; #ifdef CONFIG_SECURITY void *security; @@ -358,7 +365,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, */ struct bpf_prog_array_item { struct bpf_prog *prog; - struct bpf_cgroup_storage *cgroup_storage; + struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; }; struct bpf_prog_array { -- cgit v1.2.3 From f294b37ec7b24a574884cd157497a3748081c0f0 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Sep 2018 14:45:40 +0000 Subject: bpf: rework cgroup storage pointer passing To simplify the following introduction of per-cpu cgroup storage, let's rework a bit a mechanism of passing a pointer to a cgroup storage into the bpf_get_local_storage(). Let's save a pointer to the corresponding bpf_cgroup_storage structure, instead of a pointer to the actual buffer. It will help us to handle per-cpu storage later, which has a different way of accessing to the actual data. Signed-off-by: Roman Gushchin Acked-by: Song Liu Cc: Daniel Borkmann Cc: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index e9871b012dac..7e0c9a1d48b7 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -23,7 +23,8 @@ struct bpf_cgroup_storage; extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) -DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); +DECLARE_PER_CPU(struct bpf_cgroup_storage*, + bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); #define for_each_cgroup_storage_type(stype) \ for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) @@ -115,15 +116,9 @@ static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { enum bpf_cgroup_storage_type stype; - struct bpf_storage_buffer *buf; - - for_each_cgroup_storage_type(stype) { - if (!storage[stype]) - continue; - buf = READ_ONCE(storage[stype]->buf); - this_cpu_write(bpf_cgroup_storage[stype], &buf->data[0]); - } + for_each_cgroup_storage_type(stype) + this_cpu_write(bpf_cgroup_storage[stype], storage[stype]); } struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, -- cgit v1.2.3 From b741f1630346defcbc8cc60f1a2bdae8b3b0036f Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Sep 2018 14:45:43 +0000 Subject: bpf: introduce per-cpu cgroup local storage This commit introduced per-cpu cgroup local storage. Per-cpu cgroup local storage is very similar to simple cgroup storage (let's call it shared), except all the data is per-cpu. The main goal of per-cpu variant is to implement super fast counters (e.g. packet counters), which don't require neither lookups, neither atomic operations. >From userspace's point of view, accessing a per-cpu cgroup storage is similar to other per-cpu map types (e.g. per-cpu hashmaps and arrays). Writing to a per-cpu cgroup storage is not atomic, but is performed by copying longs, so some minimal atomicity is here, exactly as with other per-cpu maps. Signed-off-by: Roman Gushchin Cc: Daniel Borkmann Cc: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 20 +++++++++++++++++++- include/linux/bpf.h | 1 + include/linux/bpf_types.h | 1 + include/uapi/linux/bpf.h | 1 + 4 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 7e0c9a1d48b7..588dd5f0bd85 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -37,7 +37,10 @@ struct bpf_storage_buffer { }; struct bpf_cgroup_storage { - struct bpf_storage_buffer *buf; + union { + struct bpf_storage_buffer *buf; + void __percpu *percpu_buf; + }; struct bpf_cgroup_storage_map *map; struct bpf_cgroup_storage_key key; struct list_head list; @@ -109,6 +112,9 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { + if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) + return BPF_CGROUP_STORAGE_PERCPU; + return BPF_CGROUP_STORAGE_SHARED; } @@ -131,6 +137,10 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map); void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map); +int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, + void *value, u64 flags); + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ @@ -285,6 +295,14 @@ static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; } static inline void bpf_cgroup_storage_free( struct bpf_cgroup_storage *storage) {} +static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, + void *value) { + return 0; +} +static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, + void *key, void *value, u64 flags) { + return 0; +} #define cgroup_bpf_enabled (0) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b457fbe7b70b..018299a595c8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -274,6 +274,7 @@ struct bpf_prog_offload { enum bpf_cgroup_storage_type { BPF_CGROUP_STORAGE_SHARED, + BPF_CGROUP_STORAGE_PERCPU, __BPF_CGROUP_STORAGE_MAX }; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index c9bd6fb765b0..5432f4c9f50e 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -43,6 +43,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops) #endif #ifdef CONFIG_CGROUP_BPF BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index aa5ccd2385ed..e2070d819e04 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -127,6 +127,7 @@ enum bpf_map_type { BPF_MAP_TYPE_SOCKHASH, BPF_MAP_TYPE_CGROUP_STORAGE, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, }; enum bpf_prog_type { -- cgit v1.2.3 From 59c9d35ea9cd73c3a55642ec9a0097770baccb93 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Wed, 5 Sep 2018 17:06:37 +0300 Subject: net/mlx5: Cache the system image guid The system image guid is a read-only field which is used by the TC offloads code to determine if two mlx5 devices belong to the same ASIC while adding flows. Read this once and save it on the core device rather than querying each time an offloaded flow is added. Signed-off-by: Alaa Hleihel Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/vport.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ed73b51f6697..26a92462f4ce 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -838,6 +838,7 @@ struct mlx5_core_dev { u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; } caps; + u64 sys_image_guid; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; enum mlx5_device_state state; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 7e7c6dfcfb09..9c694808c212 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -121,4 +121,6 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status); int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, struct mlx5_core_dev *port_mdev); int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev); + +u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev); #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3 From b31cdffa2329fe330cd304ca26c250dd1520fb0a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:09 +0200 Subject: net: phy: Move linkmode helpers to somewhere public phylink has some useful helpers to working with linkmode bitmaps. Move them to there own header so other code can use them. Signed-off-by: Andrew Lunn Acked-by: Florian Fainelli Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- include/linux/linkmode.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mii.h | 1 + include/linux/phy.h | 1 + 3 files changed, 69 insertions(+) create mode 100644 include/linux/linkmode.h (limited to 'include') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h new file mode 100644 index 000000000000..014fb86c7114 --- /dev/null +++ b/include/linux/linkmode.h @@ -0,0 +1,67 @@ +#ifndef __LINKMODE_H +#define __LINKMODE_H + +#include +#include +#include + +static inline void linkmode_zero(unsigned long *dst) +{ + bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static inline void linkmode_copy(unsigned long *dst, const unsigned long *src) +{ + bitmap_copy(dst, src, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static inline void linkmode_and(unsigned long *dst, const unsigned long *a, + const unsigned long *b) +{ + bitmap_and(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static inline void linkmode_or(unsigned long *dst, const unsigned long *a, + const unsigned long *b) +{ + bitmap_or(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static inline bool linkmode_empty(const unsigned long *src) +{ + return bitmap_empty(src, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static inline int linkmode_andnot(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2) +{ + return bitmap_andnot(dst, src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static inline void linkmode_set_bit(int nr, volatile unsigned long *addr) +{ + __set_bit(nr, addr); +} + +static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr) +{ + __clear_bit(nr, addr); +} + +static inline void linkmode_change_bit(int nr, volatile unsigned long *addr) +{ + __change_bit(nr, addr); +} + +static inline int linkmode_test_bit(int nr, volatile unsigned long *addr) +{ + return test_bit(nr, addr); +} + +static inline int linkmode_equal(const unsigned long *src1, + const unsigned long *src2) +{ + return bitmap_equal(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +#endif /* __LINKMODE_H */ diff --git a/include/linux/mii.h b/include/linux/mii.h index 55000ee5c6ad..567047ef0309 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -10,6 +10,7 @@ #include +#include #include struct ethtool_cmd; diff --git a/include/linux/phy.h b/include/linux/phy.h index 192a1fa0c73b..d24cc46748e2 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From ab2a605fa621ecf4ec26603a237822f7772cfa28 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:10 +0200 Subject: net: phy: Add phydev_warn() Not all new style LINK_MODE bits can be converted into old style SUPPORTED bits. We need to warn when such a conversion is attempted. Add a helper for this. Convert all pr_warn() calls to phydev_warn() where possible. Signed-off-by: Andrew Lunn Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index d24cc46748e2..0ab9f89773fd 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -968,6 +968,9 @@ static inline void phy_device_reset(struct phy_device *phydev, int value) #define phydev_err(_phydev, format, args...) \ dev_err(&_phydev->mdio.dev, format, ##args) +#define phydev_warn(_phydev, format, args...) \ + dev_warn(&_phydev->mdio.dev, format, ##args) + #define phydev_dbg(_phydev, format, args...) \ dev_dbg(&_phydev->mdio.dev, format, ##args) -- cgit v1.2.3 From c4fabb8b3c0d724eb93dabaf346b0dd8a8be7118 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:11 +0200 Subject: net: phy: Add phydev_info() Add phydev_info() and make use of it within the phy drivers and core code. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 0ab9f89773fd..0f6e7bf5e9c5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -968,6 +968,9 @@ static inline void phy_device_reset(struct phy_device *phydev, int value) #define phydev_err(_phydev, format, args...) \ dev_err(&_phydev->mdio.dev, format, ##args) +#define phydev_info(_phydev, format, args...) \ + dev_info(&_phydev->mdio.dev, format, ##args) + #define phydev_warn(_phydev, format, args...) \ dev_warn(&_phydev->mdio.dev, format, ##args) -- cgit v1.2.3 From edc7ccbbcf32b97c7d26cd556f364eb4f22c4285 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:12 +0200 Subject: net: phy: Add helper to convert MII ADV register to a linkmode The phy_mii_ioctl can be used to write a value into the MII_ADVERTISE register in the PHY. Since this changes the state of the PHY, we need to make the same change to phydev->advertising. Add a helper which can convert the register value to a linkmode. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- include/linux/mii.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/mii.h b/include/linux/mii.h index 567047ef0309..8c7da9473ad9 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -303,6 +303,37 @@ static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa) return result | mii_adv_to_ethtool_adv_x(lpa); } +/** + * mii_adv_to_linkmode_adv_t + * @advertising:pointer to destination link mode. + * @adv: value of the MII_ADVERTISE register + * + * A small helper function that translates MII_ADVERTISE bits + * to linkmode advertisement settings. + */ +static inline void mii_adv_to_linkmode_adv_t(unsigned long *advertising, + u32 adv) +{ + linkmode_zero(advertising); + + if (adv & ADVERTISE_10HALF) + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, + advertising); + if (adv & ADVERTISE_10FULL) + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + advertising); + if (adv & ADVERTISE_100HALF) + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + advertising); + if (adv & ADVERTISE_100FULL) + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + advertising); + if (adv & ADVERTISE_PAUSE_CAP) + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising); + if (adv & ADVERTISE_PAUSE_ASYM) + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising); +} + /** * mii_advertise_flowctrl - get flow control advertisement flags * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both) -- cgit v1.2.3 From 5f991f7bddc991ecc3c8a009ffd76fccff4661c7 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:13 +0200 Subject: net: phy: Add helper for advertise to lcl value Add a helper to convert the local advertising to an LCL capabilities, which is then used to resolve pause flow control settings. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- include/linux/mii.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/mii.h b/include/linux/mii.h index 8c7da9473ad9..9ed49c8261d0 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -334,6 +334,25 @@ static inline void mii_adv_to_linkmode_adv_t(unsigned long *advertising, linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising); } +/** + * ethtool_adv_to_lcl_adv_t + * @advertising:pointer to ethtool advertising + * + * A small helper function that translates ethtool advertising to LVL + * pause capabilities. + */ +static inline u32 ethtool_adv_to_lcl_adv_t(u32 advertising) +{ + u32 lcl_adv = 0; + + if (advertising & ADVERTISED_Pause) + lcl_adv |= ADVERTISE_PAUSE_CAP; + if (advertising & ADVERTISED_Asym_Pause) + lcl_adv |= ADVERTISE_PAUSE_ASYM; + + return lcl_adv; +} + /** * mii_advertise_flowctrl - get flow control advertisement flags * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both) -- cgit v1.2.3 From f954a04ea18ebfcba1cd2756eaee59eb4978a20e Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:14 +0200 Subject: net: phy: Add limkmode equivalents to some of the MII ethtool helpers Add helpers which take a linkmode rather than a u32 ethtool for advertising settings. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- include/linux/mii.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include') diff --git a/include/linux/mii.h b/include/linux/mii.h index 9ed49c8261d0..2da85b02e1c0 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -132,6 +132,34 @@ static inline u32 ethtool_adv_to_mii_adv_t(u32 ethadv) return result; } +/** + * linkmode_adv_to_mii_adv_t + * @advertising: the linkmode advertisement settings + * + * A small helper function that translates linkmode advertisement + * settings to phy autonegotiation advertisements for the + * MII_ADVERTISE register. + */ +static inline u32 linkmode_adv_to_mii_adv_t(unsigned long *advertising) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, advertising)) + result |= ADVERTISE_10HALF; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, advertising)) + result |= ADVERTISE_10FULL; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, advertising)) + result |= ADVERTISE_100HALF; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, advertising)) + result |= ADVERTISE_100FULL; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising)) + result |= ADVERTISE_PAUSE_CAP; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising)) + result |= ADVERTISE_PAUSE_ASYM; + + return result; +} + /** * mii_adv_to_ethtool_adv_t * @adv: value of the MII_ADVERTISE register @@ -179,6 +207,28 @@ static inline u32 ethtool_adv_to_mii_ctrl1000_t(u32 ethadv) return result; } +/** + * linkmode_adv_to_mii_ctrl1000_t + * advertising: the linkmode advertisement settings + * + * A small helper function that translates linkmode advertisement + * settings to phy autonegotiation advertisements for the + * MII_CTRL1000 register when in 1000T mode. + */ +static inline u32 linkmode_adv_to_mii_ctrl1000_t(unsigned long *advertising) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + advertising)) + result |= ADVERTISE_1000HALF; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + advertising)) + result |= ADVERTISE_1000FULL; + + return result; +} + /** * mii_ctrl1000_to_ethtool_adv_t * @adv: value of the MII_CTRL1000 register -- cgit v1.2.3 From 719655a149715f26fc4de904fe0aa83068bd5b9e Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:16 +0200 Subject: net: phy: Replace phy driver features u32 with link_mode bitmap This is one step in allowing phylib to make use of link_mode bitmaps, instead of u32 for supported and advertised features. Convert the phy drivers to use bitmaps to indicates the features they support. Build bitmap equivalents of the u32 values at runtime, and have the drivers point to the appropriate bitmap. These bitmaps are shared, and we don't want a driver to modify them. So mark them __ro_after_init. Within phylib, the features bitmap is currently turned back into a u32. This will be removed once the whole of phylib, and the drivers are converted to use bitmaps. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/linkmode.h | 9 +++++++++ include/linux/phy.h | 24 ++++++++++++++++-------- 2 files changed, 25 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index 014fb86c7114..22443d7fb5cd 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -43,6 +43,15 @@ static inline void linkmode_set_bit(int nr, volatile unsigned long *addr) __set_bit(nr, addr); } +static inline void linkmode_set_bit_array(const int *array, int array_size, + unsigned long *addr) +{ + int i; + + for (i = 0; i < array_size; i++) + linkmode_set_bit(array[i], addr); +} + static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr) { __clear_bit(nr, addr); diff --git a/include/linux/phy.h b/include/linux/phy.h index 0f6e7bf5e9c5..dff51dd36e52 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -42,13 +42,21 @@ #define PHY_1000BT_FEATURES (SUPPORTED_1000baseT_Half | \ SUPPORTED_1000baseT_Full) -#define PHY_BASIC_FEATURES (PHY_10BT_FEATURES | \ - PHY_100BT_FEATURES | \ - PHY_DEFAULT_FEATURES) - -#define PHY_GBIT_FEATURES (PHY_BASIC_FEATURES | \ - PHY_1000BT_FEATURES) - +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; + +#define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features) +#define PHY_BASIC_T1_FEATURES ((unsigned long *)&phy_basic_t1_features) +#define PHY_GBIT_FEATURES ((unsigned long *)&phy_gbit_features) +#define PHY_GBIT_FIBRE_FEATURES ((unsigned long *)&phy_gbit_fibre_features) +#define PHY_GBIT_ALL_PORTS_FEATURES ((unsigned long *)&phy_gbit_all_ports_features) +#define PHY_10GBIT_FEATURES ((unsigned long *)&phy_10gbit_features) +#define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) /* * Set phydev->irq to PHY_POLL if interrupts are not supported, @@ -510,7 +518,7 @@ struct phy_driver { u32 phy_id; char *name; u32 phy_id_mask; - u32 features; + const unsigned long * const features; u32 flags; const void *driver_data; -- cgit v1.2.3 From 3e48be05f3c7eb6f6126939e9d957903c5cfeee5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Sep 2018 11:28:35 +0200 Subject: netlink: add attribute range validation to policy Without further bloating the policy structs, we can overload the `validation_data' pointer with a struct of s16 min, max and use those to validate ranges in NLA_{U,S}{8,16,32,64} attributes. It may sound strange to validate NLA_U32 with a s16 max, but in many cases NLA_U32 is used for enums etc. since there's no size benefit in using a smaller attribute width anyway, due to netlink attribute alignment; in cases like that it's still useful, particularly when the attribute really transports an enum value. Doing so lets us remove quite a bit of validation code, if we can be sure that these attributes aren't used by userspace in places where they're ignored today. To achieve all this, split the 'type' field and introduce a new 'validation_type' field which indicates what further validation (beyond the validation prescribed by the type of the attribute) is done. This currently allows for no further validation (the default), as well as min, max and range checks. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/netlink.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 3698ca8ff92c..d34ceeba82a8 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -188,9 +188,19 @@ enum { #define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1) +enum nla_policy_validation { + NLA_VALIDATE_NONE, + NLA_VALIDATE_RANGE, + NLA_VALIDATE_MIN, + NLA_VALIDATE_MAX, +}; + /** * struct nla_policy - attribute validation policy * @type: Type of attribute or NLA_UNSPEC + * @validation_type: type of attribute validation done in addition to + * type-specific validation (e.g. range), see + * &enum nla_policy_validation * @len: Type specific length of payload * * Policies are defined as arrays of this struct, the array must be @@ -238,7 +248,26 @@ enum { * nested attributes directly inside, while an array has * the nested attributes at another level down and the * attributes directly in the nesting don't matter. - * All other Unused + * All other Unused - but note that it's a union + * + * Meaning of `min' and `max' fields, use via NLA_POLICY_MIN, NLA_POLICY_MAX + * and NLA_POLICY_RANGE: + * NLA_U8, + * NLA_U16, + * NLA_U32, + * NLA_U64, + * NLA_S8, + * NLA_S16, + * NLA_S32, + * NLA_S64 These are used depending on the validation_type + * field, if that is min/max/range then the minimum, + * maximum and both are used (respectively) to check + * the value of the integer attribute. + * Note that in the interest of code simplicity and + * struct size both limits are s16, so you cannot + * enforce a range that doesn't fall within the range + * of s16 - do that as usual in the code instead. + * All other Unused - but note that it's a union * * Example: * static const struct nla_policy my_policy[ATTR_MAX+1] = { @@ -249,9 +278,15 @@ enum { * }; */ struct nla_policy { - u16 type; + u8 type; + u8 validation_type; u16 len; - const void *validation_data; + union { + const void *validation_data; + struct { + s16 min, max; + }; + }; }; #define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_EXACT_LEN, .len = _len } @@ -266,6 +301,32 @@ struct nla_policy { #define NLA_POLICY_NESTED_ARRAY(maxattr, policy) \ { .type = NLA_NESTED_ARRAY, .validation_data = policy, .len = maxattr } +#define __NLA_ENSURE(condition) (sizeof(char[1 - 2*!(condition)]) - 1) +#define NLA_ENSURE_INT_TYPE(tp) \ + (__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 || \ + tp == NLA_S16 || tp == NLA_U16 || \ + tp == NLA_S32 || tp == NLA_U32 || \ + tp == NLA_S64 || tp == NLA_U64) + tp) + +#define NLA_POLICY_RANGE(tp, _min, _max) { \ + .type = NLA_ENSURE_INT_TYPE(tp), \ + .validation_type = NLA_VALIDATE_RANGE, \ + .min = _min, \ + .max = _max \ +} + +#define NLA_POLICY_MIN(tp, _min) { \ + .type = NLA_ENSURE_INT_TYPE(tp), \ + .validation_type = NLA_VALIDATE_MIN, \ + .min = _min, \ +} + +#define NLA_POLICY_MAX(tp, _max) { \ + .type = NLA_ENSURE_INT_TYPE(tp), \ + .validation_type = NLA_VALIDATE_MAX, \ + .max = _max, \ +} + /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request -- cgit v1.2.3 From 33188bd6430ef06d206ae4fda2cc92f14f16fd20 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Sep 2018 11:28:36 +0200 Subject: netlink: add validation function to policy Add the ability to have an arbitrary validation function attached to a netlink policy that doesn't already use the validation_data pointer in another way. This can be useful to validate for example the content of a binary attribute, like in nl80211 the "(information) elements", which must be valid streams of "u8 type, u8 length, u8 value[length]". Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/netlink.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index d34ceeba82a8..6a106ef5ca56 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -193,13 +193,14 @@ enum nla_policy_validation { NLA_VALIDATE_RANGE, NLA_VALIDATE_MIN, NLA_VALIDATE_MAX, + NLA_VALIDATE_FUNCTION, }; /** * struct nla_policy - attribute validation policy * @type: Type of attribute or NLA_UNSPEC * @validation_type: type of attribute validation done in addition to - * type-specific validation (e.g. range), see + * type-specific validation (e.g. range, function call), see * &enum nla_policy_validation * @len: Type specific length of payload * @@ -269,6 +270,13 @@ enum nla_policy_validation { * of s16 - do that as usual in the code instead. * All other Unused - but note that it's a union * + * Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN: + * NLA_BINARY Validation function called for the attribute, + * not compatible with use of the validation_data + * as in NLA_BITFIELD32, NLA_REJECT, NLA_NESTED and + * NLA_NESTED_ARRAY. + * All other Unused - but note that it's a union + * * Example: * static const struct nla_policy my_policy[ATTR_MAX+1] = { * [ATTR_FOO] = { .type = NLA_U16 }, @@ -286,6 +294,8 @@ struct nla_policy { struct { s16 min, max; }; + int (*validate)(const struct nlattr *attr, + struct netlink_ext_ack *extack); }; }; @@ -307,6 +317,11 @@ struct nla_policy { tp == NLA_S16 || tp == NLA_U16 || \ tp == NLA_S32 || tp == NLA_U32 || \ tp == NLA_S64 || tp == NLA_U64) + tp) +#define NLA_ENSURE_NO_VALIDATION_PTR(tp) \ + (__NLA_ENSURE(tp != NLA_BITFIELD32 && \ + tp != NLA_REJECT && \ + tp != NLA_NESTED && \ + tp != NLA_NESTED_ARRAY) + tp) #define NLA_POLICY_RANGE(tp, _min, _max) { \ .type = NLA_ENSURE_INT_TYPE(tp), \ @@ -327,6 +342,13 @@ struct nla_policy { .max = _max, \ } +#define NLA_POLICY_VALIDATE_FN(tp, fn, ...) { \ + .type = NLA_ENSURE_NO_VALIDATION_PTR(tp), \ + .validation_type = NLA_VALIDATE_FUNCTION, \ + .validate = fn, \ + .len = __VA_ARGS__ + 0, \ +} + /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request -- cgit v1.2.3 From 9f2959b6b52d43326b2f6a0e0d7ffe6f4fc3b5ca Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 28 Sep 2018 08:51:09 +0200 Subject: net: phy: improve handling delayed work Using mod_delayed_work() allows to simplify handling delayed work and removes the need for the sync parameter in phy_trigger_machine(). Also introduce a helper phy_queue_state_machine() to encapsulate the low-level delayed work calls. No functional change intended. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index dff51dd36e52..3ea87f774a76 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1054,7 +1054,7 @@ void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); -void phy_trigger_machine(struct phy_device *phydev, bool sync); +void phy_trigger_machine(struct phy_device *phydev); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); void phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd); -- cgit v1.2.3 From fb420d5d91c1274d5966917725e71f27ed092a85 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Sep 2018 10:28:44 -0700 Subject: tcp/fq: move back to CLOCK_MONOTONIC In the recent TCP/EDT patch series, I switched TCP and sch_fq clocks from MONOTONIC to TAI, in order to meet the choice done earlier for sch_etf packet scheduler. But sure enough, this broke some setups were the TAI clock jumps forward (by almost 50 year...), as reported by Leonard Crestez. If we want to converge later, we'll probably need to add an skb field to differentiate the clock bases, or a socket option. In the meantime, an UDP application will need to use CLOCK_MONOTONIC base for its SCM_TXTIME timestamps if using fq packet scheduler. Fixes: 72b0094f9182 ("tcp: switch tcp_clock_ns() to CLOCK_TAI base") Fixes: 142537e41923 ("net_sched: sch_fq: switch to CLOCK_TAI") Fixes: fd2bca2aa789 ("tcp: switch internal pacing timer to CLOCK_TAI") Signed-off-by: Eric Dumazet Reported-by: Leonard Crestez Tested-by: Leonard Crestez Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index ff15d8e0d525..0d2929223c70 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -732,7 +732,7 @@ void tcp_send_window_probe(struct sock *sk); static inline u64 tcp_clock_ns(void) { - return ktime_get_tai_ns(); + return ktime_get_ns(); } static inline u64 tcp_clock_us(void) -- cgit v1.2.3 From 81e54d08d9d845053111f30045a93f3eb1c3ca96 Mon Sep 17 00:00:00 2001 From: Pradeep Kumar Chitrapu Date: Thu, 20 Sep 2018 17:30:09 -0700 Subject: cfg80211: support FTM responder configuration/statistics Allow userspace to enable fine timing measurement responder functionality with configurable lci/civic parameters in AP mode. This can be done at AP start or changing beacon parameters. A new EXT_FEATURE flag is introduced for drivers to advertise the capability. Also nl80211 API support for retrieving statistics is added. Signed-off-by: Johannes Berg Signed-off-by: Pradeep Kumar Chitrapu [remove unused cfg80211_ftm_responder_params, clarify docs, move validation into policy] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 52 +++++++++++++++++++++++++ include/uapi/linux/nl80211.h | 90 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9f3ed79c39d7..deb313105014 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -775,6 +775,12 @@ struct cfg80211_crypto_settings { * @assocresp_ies_len: length of assocresp_ies in octets * @probe_resp_len: length of probe response template (@probe_resp) * @probe_resp: probe response template (AP mode only) + * @ftm_responder: enable FTM responder functionality; -1 for no change + * (which also implies no change in LCI/civic location data) + * @lci: LCI subelement content + * @civicloc: Civic location subelement content + * @lci_len: LCI data length + * @civicloc_len: Civic location data length */ struct cfg80211_beacon_data { const u8 *head, *tail; @@ -782,12 +788,17 @@ struct cfg80211_beacon_data { const u8 *proberesp_ies; const u8 *assocresp_ies; const u8 *probe_resp; + const u8 *lci; + const u8 *civicloc; + s8 ftm_responder; size_t head_len, tail_len; size_t beacon_ies_len; size_t proberesp_ies_len; size_t assocresp_ies_len; size_t probe_resp_len; + size_t lci_len; + size_t civicloc_len; }; struct mac_address { @@ -2796,6 +2807,40 @@ struct cfg80211_external_auth_params { u16 status; }; +/** + * cfg80211_ftm_responder_stats - FTM responder statistics + * + * @filled: bitflag of flags using the bits of &enum nl80211_ftm_stats to + * indicate the relevant values in this struct for them + * @success_num: number of FTM sessions in which all frames were successfully + * answered + * @partial_num: number of FTM sessions in which part of frames were + * successfully answered + * @failed_num: number of failed FTM sessions + * @asap_num: number of ASAP FTM sessions + * @non_asap_num: number of non-ASAP FTM sessions + * @total_duration_ms: total sessions durations - gives an indication + * of how much time the responder was busy + * @unknown_triggers_num: number of unknown FTM triggers - triggers from + * initiators that didn't finish successfully the negotiation phase with + * the responder + * @reschedule_requests_num: number of FTM reschedule requests - initiator asks + * for a new scheduling although it already has scheduled FTM slot + * @out_of_window_triggers_num: total FTM triggers out of scheduled window + */ +struct cfg80211_ftm_responder_stats { + u32 filled; + u32 success_num; + u32 partial_num; + u32 failed_num; + u32 asap_num; + u32 non_asap_num; + u64 total_duration_ms; + u32 unknown_triggers_num; + u32 reschedule_requests_num; + u32 out_of_window_triggers_num; +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -3128,6 +3173,9 @@ struct cfg80211_external_auth_params { * * @tx_control_port: TX a control port frame (EAPoL). The noencrypt parameter * tells the driver that the frame should not be encrypted. + * + * @get_ftm_responder_stats: Retrieve FTM responder statistics, if available. + * Statistics should be cumulative, currently no way to reset is provided. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -3433,6 +3481,10 @@ struct cfg80211_ops { const u8 *buf, size_t len, const u8 *dest, const __be16 proto, const bool noencrypt); + + int (*get_ftm_responder_stats)(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_ftm_responder_stats *ftm_stats); }; /* diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index cfc94178d608..dc6d5a1ef470 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1033,6 +1033,9 @@ * %NL80211_ATTR_CHANNEL_WIDTH,%NL80211_ATTR_NSS attributes with its * address(specified in %NL80211_ATTR_MAC). * + * @NL80211_CMD_GET_FTM_RESPONDER_STATS: Retrieve FTM responder statistics, in + * the %NL80211_ATTR_FTM_RESPONDER_STATS attribute. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1245,6 +1248,8 @@ enum nl80211_commands { NL80211_CMD_CONTROL_PORT_FRAME, + NL80211_CMD_GET_FTM_RESPONDER_STATS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2241,6 +2246,14 @@ enum nl80211_commands { * association request when used with NL80211_CMD_NEW_STATION). Can be set * only if %NL80211_STA_FLAG_WME is set. * + * @NL80211_ATTR_FTM_RESPONDER: nested attribute which user-space can include + * in %NL80211_CMD_START_AP or %NL80211_CMD_SET_BEACON for fine timing + * measurement (FTM) responder functionality and containing parameters as + * possible, see &enum nl80211_ftm_responder_attr + * + * @NL80211_ATTR_FTM_RESPONDER_STATS: Nested attribute with FTM responder + * statistics, see &enum nl80211_ftm_responder_stats. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2682,6 +2695,10 @@ enum nl80211_attrs { NL80211_ATTR_HE_CAPABILITY, + NL80211_ATTR_FTM_RESPONDER, + + NL80211_ATTR_FTM_RESPONDER_STATS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5225,6 +5242,8 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT: Driver/device can omit all data * except for supported rates from the probe request content if requested * by the %NL80211_SCAN_FLAG_MIN_PREQ_CONTENT flag. + * @NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER: Driver supports enabling fine + * timing measurement responder role. * * @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0: Driver/device confirm that they are * able to rekey an in-use key correctly. Userspace must not rekey PTK keys @@ -5269,6 +5288,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SCAN_RANDOM_SN, NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT, NL80211_EXT_FEATURE_CAN_REPLACE_PTK0, + NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, @@ -5808,4 +5828,74 @@ enum nl80211_external_auth_action { NL80211_EXTERNAL_AUTH_ABORT, }; +/** + * enum nl80211_ftm_responder_attributes - fine timing measurement + * responder attributes + * @__NL80211_FTM_RESP_ATTR_INVALID: Invalid + * @NL80211_FTM_RESP_ATTR_ENABLED: FTM responder is enabled + * @NL80211_FTM_RESP_ATTR_LCI: The content of Measurement Report Element + * (9.4.2.22 in 802.11-2016) with type 8 - LCI (9.4.2.22.10) + * @NL80211_FTM_RESP_ATTR_CIVIC: The content of Measurement Report Element + * (9.4.2.22 in 802.11-2016) with type 11 - Civic (Section 9.4.2.22.13) + * @__NL80211_FTM_RESP_ATTR_LAST: Internal + * @NL80211_FTM_RESP_ATTR_MAX: highest FTM responder attribute. + */ +enum nl80211_ftm_responder_attributes { + __NL80211_FTM_RESP_ATTR_INVALID, + + NL80211_FTM_RESP_ATTR_ENABLED, + NL80211_FTM_RESP_ATTR_LCI, + NL80211_FTM_RESP_ATTR_CIVICLOC, + + /* keep last */ + __NL80211_FTM_RESP_ATTR_LAST, + NL80211_FTM_RESP_ATTR_MAX = __NL80211_FTM_RESP_ATTR_LAST - 1, +}; + +/* + * enum nl80211_ftm_responder_stats - FTM responder statistics + * + * These attribute types are used with %NL80211_ATTR_FTM_RESPONDER_STATS + * when getting FTM responder statistics. + * + * @__NL80211_FTM_STATS_INVALID: attribute number 0 is reserved + * @NL80211_FTM_STATS_SUCCESS_NUM: number of FTM sessions in which all frames + * were ssfully answered (u32) + * @NL80211_FTM_STATS_PARTIAL_NUM: number of FTM sessions in which part of the + * frames were successfully answered (u32) + * @NL80211_FTM_STATS_FAILED_NUM: number of failed FTM sessions (u32) + * @NL80211_FTM_STATS_ASAP_NUM: number of ASAP sessions (u32) + * @NL80211_FTM_STATS_NON_ASAP_NUM: number of non-ASAP sessions (u32) + * @NL80211_FTM_STATS_TOTAL_DURATION_MSEC: total sessions durations - gives an + * indication of how much time the responder was busy (u64, msec) + * @NL80211_FTM_STATS_UNKNOWN_TRIGGERS_NUM: number of unknown FTM triggers - + * triggers from initiators that didn't finish successfully the negotiation + * phase with the responder (u32) + * @NL80211_FTM_STATS_RESCHEDULE_REQUESTS_NUM: number of FTM reschedule requests + * - initiator asks for a new scheduling although it already has scheduled + * FTM slot (u32) + * @NL80211_FTM_STATS_OUT_OF_WINDOW_TRIGGERS_NUM: number of FTM triggers out of + * scheduled window (u32) + * @NL80211_FTM_STATS_PAD: used for padding, ignore + * @__NL80211_TXQ_ATTR_AFTER_LAST: Internal + * @NL80211_FTM_STATS_MAX: highest possible FTM responder stats attribute + */ +enum nl80211_ftm_responder_stats { + __NL80211_FTM_STATS_INVALID, + NL80211_FTM_STATS_SUCCESS_NUM, + NL80211_FTM_STATS_PARTIAL_NUM, + NL80211_FTM_STATS_FAILED_NUM, + NL80211_FTM_STATS_ASAP_NUM, + NL80211_FTM_STATS_NON_ASAP_NUM, + NL80211_FTM_STATS_TOTAL_DURATION_MSEC, + NL80211_FTM_STATS_UNKNOWN_TRIGGERS_NUM, + NL80211_FTM_STATS_RESCHEDULE_REQUESTS_NUM, + NL80211_FTM_STATS_OUT_OF_WINDOW_TRIGGERS_NUM, + NL80211_FTM_STATS_PAD, + + /* keep last */ + __NL80211_FTM_STATS_AFTER_LAST, + NL80211_FTM_STATS_MAX = __NL80211_FTM_STATS_AFTER_LAST - 1 +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From b60ad3485106b5845113e7a2745abb7e64b15d6d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 1 Oct 2018 11:52:07 +0200 Subject: cfg80211: move cookie_counter out of wiphy There's no reason for drivers to be able to access the cfg80211 internal cookie counter; move it out of the wiphy into the rdev structure. While at it, also make it never assign 0 as a cookie (we consider that invalid in some places), and warn if we manage to do that for some reason (wrapping is not likely to happen with a u64.) Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index deb313105014..8f5ee2c2da04 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4012,7 +4012,6 @@ struct wiphy_iftype_ext_capab { * by the driver in the .connect() callback. The bit position maps to the * attribute indices defined in &enum nl80211_bss_select_attr. * - * @cookie_counter: unique generic cookie counter, used to identify objects. * @nan_supported_bands: bands supported by the device in NAN mode, a * bitmap of &enum nl80211_band values. For instance, for * NL80211_BAND_2GHZ, bit 0 would be set @@ -4151,8 +4150,6 @@ struct wiphy { u32 bss_select_support; - u64 cookie_counter; - u8 nan_supported_bands; u32 txq_limit; -- cgit v1.2.3 From cc16567e5a8a7bb9439ef61ab80069acdd33f76f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 2 Oct 2018 11:03:40 +0200 Subject: net: drop unused skb_append_datato_frags() This helper is unused since commit 988cf74deb45 ("inet: Stop generating UFO packets.") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 87e29710373f..119d092c6b13 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1082,11 +1082,6 @@ static inline int skb_pad(struct sk_buff *skb, int pad) } #define dev_kfree_skb(a) consume_skb(a) -int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, - int getfrag(void *from, char *to, int offset, - int len, int odd, struct sk_buff *skb), - void *from, int length); - int skb_append_pagefrags(struct sk_buff *skb, struct page *page, int offset, size_t size); -- cgit v1.2.3 From 5bf0961cc6a180c077793f2615a8fd842c655876 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 2 Oct 2018 06:16:11 -0700 Subject: qed: Add driver support for 20G link speed. Add driver support for configuring/reading the 20G link speed. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 8cd34645e892..dee3c9c744f7 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -670,10 +670,11 @@ enum qed_link_mode_bits { QED_LM_1000baseT_Half_BIT = BIT(4), QED_LM_1000baseT_Full_BIT = BIT(5), QED_LM_10000baseKR_Full_BIT = BIT(6), - QED_LM_25000baseKR_Full_BIT = BIT(7), - QED_LM_40000baseLR4_Full_BIT = BIT(8), - QED_LM_50000baseKR2_Full_BIT = BIT(9), - QED_LM_100000baseKR4_Full_BIT = BIT(10), + QED_LM_20000baseKR2_Full_BIT = BIT(7), + QED_LM_25000baseKR_Full_BIT = BIT(8), + QED_LM_40000baseLR4_Full_BIT = BIT(9), + QED_LM_50000baseKR2_Full_BIT = BIT(10), + QED_LM_100000baseKR4_Full_BIT = BIT(11), QED_LM_COUNT = 11 }; -- cgit v1.2.3 From d456336d164886d9339aaa112d6595e1c142f8bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Sat, 29 Sep 2018 23:44:50 -0700 Subject: net: remove 1 always zero parameter from ip6_redirect_no_header() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (the parameter in question is mark) Signed-off-by: Maciej Żenczykowski Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_route.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 7b9c82de11cc..cef186dbd2ce 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -165,8 +165,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, kuid_t uid); -void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, - u32 mark); +void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif); void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); struct netlink_callback; -- cgit v1.2.3 From f3709f69b7c5cba6323cc03c29b64293b93be817 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Oct 2018 13:35:29 -0700 Subject: bpf: Add iterator for spilled registers Add this iterator for spilled registers, it concentrates the details of how to get the current frame's spilled registers into a single macro while clarifying the intention of the code which is calling the macro. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b42b60a83e19..d0e7f97e8b60 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -131,6 +131,17 @@ struct bpf_verifier_state { u32 curframe; }; +#define bpf_get_spilled_reg(slot, frame) \ + (((slot < frame->allocated_stack / BPF_REG_SIZE) && \ + (frame->stack[slot].slot_type[0] == STACK_SPILL)) \ + ? &frame->stack[slot].spilled_ptr : NULL) + +/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */ +#define bpf_for_each_spilled_reg(iter, frame, reg) \ + for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \ + iter < frame->allocated_stack / BPF_REG_SIZE; \ + iter++, reg = bpf_get_spilled_reg(iter, frame)) + /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; -- cgit v1.2.3 From c64b7983288e636356f7f5f652de4813e1cfedac Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Oct 2018 13:35:33 -0700 Subject: bpf: Add PTR_TO_SOCKET verifier type Teach the verifier a little bit about a new type of pointer, a PTR_TO_SOCKET. This pointer type is accessed from BPF through the 'struct bpf_sock' structure. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 34 ++++++++++++++++++++++++++++++++++ include/linux/bpf_verifier.h | 2 ++ 2 files changed, 36 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 018299a595c8..027697b6a22f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -154,6 +154,7 @@ enum bpf_arg_type { ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ }; /* type of values returned from helper functions */ @@ -162,6 +163,7 @@ enum bpf_return_type { RET_VOID, /* function doesn't return anything */ RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ + RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -213,6 +215,8 @@ enum bpf_reg_type { PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ + PTR_TO_SOCKET, /* reg points to struct bpf_sock */ + PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -343,6 +347,11 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void); typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, unsigned long off, unsigned long len); +typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type, + const struct bpf_insn *src, + struct bpf_insn *dst, + struct bpf_prog *prog, + u32 *target_size); u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); @@ -836,4 +845,29 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto; void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +#if defined(CONFIG_NET) +bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info); +u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size); +#else +static inline bool bpf_sock_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + return false; +} +static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) +{ + return 0; +} +#endif + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d0e7f97e8b60..a411363098a5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -58,6 +58,8 @@ struct bpf_reg_state { * offset, so they can share range knowledge. * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we * came from, when one is tested for != NULL. + * For PTR_TO_SOCKET this is used to share which pointers retain the + * same reference to the socket, to determine proper reference freeing. */ u32 id; /* For scalar types (SCALAR_VALUE), this represents our knowledge of -- cgit v1.2.3 From fd978bf7fd312581a7ca454a991f0ffb34c4204b Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Oct 2018 13:35:35 -0700 Subject: bpf: Add reference tracking to verifier Allow helper functions to acquire a reference and return it into a register. Specific pointer types such as the PTR_TO_SOCKET will implicitly represent such a reference. The verifier must ensure that these references are released exactly once in each path through the program. To achieve this, this commit assigns an id to the pointer and tracks it in the 'bpf_func_state', then when the function or program exits, verifies that all of the acquired references have been freed. When the pointer is passed to a function that frees the reference, it is removed from the 'bpf_func_state` and all existing copies of the pointer in registers are marked invalid. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index a411363098a5..7b6fd2ab3263 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -104,6 +104,17 @@ struct bpf_stack_state { u8 slot_type[BPF_REG_SIZE]; }; +struct bpf_reference_state { + /* Track each reference created with a unique id, even if the same + * instruction creates the reference multiple times (eg, via CALL). + */ + int id; + /* Instruction where the allocation of this reference occurred. This + * is used purely to inform the user of a reference leak. + */ + int insn_idx; +}; + /* state of the program: * type of all registers and stack info */ @@ -121,7 +132,9 @@ struct bpf_func_state { */ u32 subprogno; - /* should be second to last. See copy_func_state() */ + /* The following fields should be last. See copy_func_state() */ + int acquired_refs; + struct bpf_reference_state *refs; int allocated_stack; struct bpf_stack_state *stack; }; @@ -217,11 +230,16 @@ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...); -static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) +static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { struct bpf_verifier_state *cur = env->cur_state; - return cur->frame[cur->curframe]->regs; + return cur->frame[cur->curframe]; +} + +static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) +{ + return cur_func(env)->regs; } int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); -- cgit v1.2.3 From 6acc9b432e6714d72d7d77ec7c27f6f8358d0c71 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Oct 2018 13:35:36 -0700 Subject: bpf: Add helper to retrieve socket in BPF This patch adds new BPF helper functions, bpf_sk_lookup_tcp() and bpf_sk_lookup_udp() which allows BPF programs to find out if there is a socket listening on this host, and returns a socket pointer which the BPF program can then access to determine, for instance, whether to forward or drop traffic. bpf_sk_lookup_xxx() may take a reference on the socket, so when a BPF program makes use of this function, it must subsequently pass the returned pointer into the newly added sk_release() to return the reference. By way of example, the following pseudocode would filter inbound connections at XDP if there is no corresponding service listening for the traffic: struct bpf_sock_tuple tuple; struct bpf_sock_ops *sk; populate_tuple(ctx, &tuple); // Extract the 5tuple from the packet sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof tuple, netns, 0); if (!sk) { // Couldn't find a socket listening for this traffic. Drop. return TC_ACT_SHOT; } bpf_sk_release(sk, 0); return TC_ACT_OK; Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 93 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e2070d819e04..f9187b41dff6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2144,6 +2144,77 @@ union bpf_attr { * request in the skb. * Return * 0 on success, or a negative error in case of failure. + * + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-NULL, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is zero, then the socket lookup table in the + * netns associated with the *ctx* will be used. For the TC hooks, + * this in the netns of the device in the skb. For socket hooks, + * this in the netns of the socket. If *netns* is non-zero, then + * it specifies the ID of the netns relative to the netns + * associated with the *ctx*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. + * + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * Description + * Look for UDP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-NULL, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is zero, then the socket lookup table in the + * netns associated with the *ctx* will be used. For the TC hooks, + * this in the netns of the device in the skb. For socket hooks, + * this in the netns of the socket. If *netns* is non-zero, then + * it specifies the ID of the netns relative to the netns + * associated with the *ctx*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. + * + * int bpf_sk_release(struct bpf_sock *sk) + * Description + * Release the reference held by *sock*. *sock* must be a non-NULL + * pointer that was returned from bpf_sk_lookup_xxx\ (). + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2229,7 +2300,10 @@ union bpf_attr { FN(get_current_cgroup_id), \ FN(get_local_storage), \ FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), + FN(skb_ancestor_cgroup_id), \ + FN(sk_lookup_tcp), \ + FN(sk_lookup_udp), \ + FN(sk_release), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2399,6 +2473,23 @@ struct bpf_sock { */ }; +struct bpf_sock_tuple { + union { + struct { + __be32 saddr; + __be32 daddr; + __be16 sport; + __be16 dport; + } ipv4; + struct { + __be32 saddr[4]; + __be32 daddr[4]; + __be16 sport; + __be16 dport; + } ipv6; + }; +}; + #define XDP_PACKET_HEADROOM 256 /* User return codes for XDP prog type. -- cgit v1.2.3 From 8873c064d1de579ea23412a6d3eee972593f142b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 1 Oct 2018 23:24:26 -0700 Subject: tcp: do not release socket ownership in tcp_close() syzkaller was able to hit the WARN_ON(sock_owned_by_user(sk)); in tcp_close() While a socket is being closed, it is very possible other threads find it in rtnetlink dump. tcp_get_info() will acquire the socket lock for a short amount of time (slow = lock_sock_fast(sk)/unlock_sock_fast(sk, slow);), enough to trigger the warning. Fixes: 67db3e4bfbc9 ("tcp: no longer hold ehash lock while calling tcp_get_info()") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 38cae35f6e16..751549ac0a84 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1492,6 +1492,7 @@ static inline void lock_sock(struct sock *sk) lock_sock_nested(sk, 0); } +void __release_sock(struct sock *sk); void release_sock(struct sock *sk); /* BH context may only use the following locking interface. */ -- cgit v1.2.3 From f3edc2dbe0ad0bbbd8450cd37328f99acf215fd8 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 1 Oct 2018 17:02:43 +0100 Subject: net: usbnet: make driver_info const The driver_info field that is used for describing each of the usb-net drivers using the usbnet.c core all declare their information as const and the usbnet.c itself does not try and modify the struct. It is therefore a good idea to make this const in the usbnet.c structure in case anyone tries to modify it. Signed-off-by: Ben Dooks Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index e2ec3582e549..d8860f2d0976 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -28,7 +28,7 @@ struct usbnet { /* housekeeping */ struct usb_device *udev; struct usb_interface *intf; - struct driver_info *driver_info; + const struct driver_info *driver_info; const char *driver_name; void *driver_priv; wait_queue_head_t wait; -- cgit v1.2.3 From 4e6d47206c32d1bbb4931f1d851dae3870e0df81 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Sun, 30 Sep 2018 08:04:35 +0530 Subject: tls: Add support for inplace records encryption Presently, for non-zero copy case, separate pages are allocated for storing plaintext and encrypted text of records. These pages are stored in sg_plaintext_data and sg_encrypted_data scatterlists inside record structure. Further, sg_plaintext_data & sg_encrypted_data are passed to cryptoapis for record encryption. Allocating separate pages for plaintext and encrypted text is inefficient from both required memory and performance point of view. This patch adds support of inplace encryption of records. For non-zero copy case, we reuse the pages from sg_encrypted_data scatterlist to copy the application's plaintext data. For the movement of pages from sg_encrypted_data to sg_plaintext_data scatterlists, we introduce a new function move_to_plaintext_sg(). This function add pages into sg_plaintext_data from sg_encrypted_data scatterlists. tls_do_encryption() is modified to pass the same scatterlist as both source and destination into aead_request_set_crypt() if inplace crypto has been enabled. A new ariable 'inplace_crypto' has been introduced in record structure to signify whether the same scatterlist can be used. By default, the inplace_crypto is enabled in get_rec(). If zero-copy is used (i.e. plaintext data is not copied), inplace_crypto is set to '0'. Signed-off-by: Vakul Garg Reviewed-by: Dave Watson Signed-off-by: David S. Miller --- include/net/tls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 262420cdad10..5e853835597e 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -101,6 +101,7 @@ struct tls_rec { struct list_head list; int tx_ready; int tx_flags; + int inplace_crypto; /* AAD | sg_plaintext_data | sg_tag */ struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS + 1]; -- cgit v1.2.3 From e9837e55b0200da544a095a1fca36efd7fd3ba30 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Mon, 1 Oct 2018 18:23:08 -0700 Subject: netfilter: xt_quota: fix the behavior of xt_quota module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A major flaw of the current xt_quota module is that quota in a specific rule gets reset every time there is a rule change in the same table. It makes the xt_quota module not very useful in a table in which iptables rules are changed at run time. This fix introduces a new counter that is visible to userspace as the remaining quota of the current rule. When userspace restores the rules in a table, it can restore the counter to the remaining quota instead of resetting it to the full quota. Signed-off-by: Chenbo Feng Suggested-by: Maciej Żenczykowski Reviewed-by: Maciej Żenczykowski Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_quota.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/xt_quota.h b/include/uapi/linux/netfilter/xt_quota.h index f3ba5d9e58b6..d72fd52adbba 100644 --- a/include/uapi/linux/netfilter/xt_quota.h +++ b/include/uapi/linux/netfilter/xt_quota.h @@ -15,9 +15,11 @@ struct xt_quota_info { __u32 flags; __u32 pad; __aligned_u64 quota; - - /* Used internally by the kernel */ - struct xt_quota_priv *master; +#ifdef __KERNEL__ + atomic64_t counter; +#else + __aligned_u64 remain; +#endif }; #endif /* _XT_QUOTA_H */ -- cgit v1.2.3 From 16fc087b9cb22c9a97307cc24a5413d0df68fe11 Mon Sep 17 00:00:00 2001 From: Yashaswini Raghuram Prathivadi Bhayankaram Date: Fri, 10 Aug 2018 00:17:44 -0700 Subject: virtchnl: Added support to exchange additional speed values Introduced a new virtchnl capability flag and a struct to support exchange of additional supported speeds. Signed-off-by: Yashaswini Raghuram Prathivadi Bhayankaram Signed-off-by: Anirudh Venkataramanan Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index b41f7bc958ef..2c9756bd9c4c 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -252,6 +252,8 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000 #define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000 +/* Define below the capability flags that are not offloads */ +#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED 0x00000080 #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ VIRTCHNL_VF_OFFLOAD_VLAN | \ VIRTCHNL_VF_OFFLOAD_RSS_PF) @@ -596,10 +598,23 @@ enum virtchnl_event_codes { struct virtchnl_pf_event { enum virtchnl_event_codes event; union { + /* If the PF driver does not support the new speed reporting + * capabilities then use link_event else use link_event_adv to + * get the speed and link information. The ability to understand + * new speeds is indicated by setting the capability flag + * VIRTCHNL_VF_CAP_ADV_LINK_SPEED in vf_cap_flags parameter + * in virtchnl_vf_resource struct and can be used to determine + * which link event struct to use below. + */ struct { enum virtchnl_link_speed link_speed; bool link_status; } link_event; + struct { + /* link_speed provided in Mbps */ + u32 link_speed; + u8 link_status; + } link_event_adv; } event_data; int severity; -- cgit v1.2.3 From db7ff19e7b119adb4618fbc6410b441d1c3b55c5 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 15 Aug 2018 16:02:18 +0300 Subject: devlink: Add extack for eswitch operations Add extack argument to the eswitch related operations. Signed-off-by: Eli Britstein Reviewed-by: Or Gerlitz Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/net/devlink.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index b9b89d6604d4..70671f0d4c30 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -451,11 +451,14 @@ struct devlink_ops { u32 *p_cur, u32 *p_max); int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode); - int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); + int (*eswitch_mode_set)(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack); int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode); - int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode); + int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode, + struct netlink_ext_ack *extack); int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode); - int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode); + int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) -- cgit v1.2.3 From fcd29ad17c6ff885dfae58f557e9323941e63ba2 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Thu, 9 Aug 2018 09:55:21 +0300 Subject: net/mlx5: Add Fast teardown support Today mlx5 devices support two teardown modes: 1- Regular teardown 2- Force teardown This change introduces the enhanced version of the "Force teardown" that allows SW to perform teardown in a faster way without the need to reclaim all the pages. Fast teardown provides the following advantages: 1- Fix a FW race condition that could cause command timeout 2- Avoid moving to polling mode 3- Close the vport to prevent PCI ACK to be sent without been scatter to memory Signed-off-by: Feras Daoud Reviewed-by: Majd Dibbiny Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 4 ++++ include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 11fa4e66afc5..e9b502d5bcc1 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -504,6 +504,10 @@ struct health_buffer { __be16 ext_synd; }; +enum mlx5_cmd_addr_l_sz_offset { + MLX5_NIC_IFC_OFFSET = 8, +}; + struct mlx5_init_seg { __be32 fw_rev; __be32 cmdif_rev_fw_sub; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f043d65b9bac..6e8a882052b1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -896,7 +896,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_mkey[0x6]; u8 reserved_at_f0[0x8]; u8 dump_fill_mkey[0x1]; - u8 reserved_at_f9[0x3]; + u8 reserved_at_f9[0x2]; + u8 fast_teardown[0x1]; u8 log_max_eq[0x4]; u8 max_indirection[0x8]; @@ -3352,12 +3353,13 @@ struct mlx5_ifc_teardown_hca_out_bits { u8 reserved_at_40[0x3f]; - u8 force_state[0x1]; + u8 state[0x1]; }; enum { MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE = 0x0, MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE = 0x1, + MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN = 0x2, }; struct mlx5_ifc_teardown_hca_in_bits { -- cgit v1.2.3 From 2070a3e44962212d6ef02c5def821b1b9744e496 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 09:42:29 +0100 Subject: rxrpc: Allow the reply time to be obtained on a client call Allow the timestamp on the sk_buff holding the first DATA packet of a reply to be queried. This can then be used as a base for the expiry time calculation on the callback promise duration indicated by an operation result. Signed-off-by: David Howells --- include/net/af_rxrpc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index f53edb3754bc..c4c912554dee 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -13,6 +13,7 @@ #define _NET_RXRPC_H #include +#include struct key; struct sock; @@ -77,5 +78,7 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, enum rxrpc_call_completion *, u32 *); u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *); +bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *, + ktime_t *); #endif /* _NET_RXRPC_H */ -- cgit v1.2.3 From e908bcf4f1a271e7c264dcbffc5881ced8bfacee Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 09:54:29 +0100 Subject: rxrpc: Allow the reply time to be obtained on a client call Allow the epoch value to be queried on a server connection. This is in the rxrpc header of every packet for use in routing and is derived from the client's state. It's also not supposed to change unless the client gets restarted. AFS can make use of this information to deduce whether a fileserver has been restarted because the fileserver makes client calls to the filesystem driver's cache manager to send notifications (ie. callback breaks) about conflicting changes from other clients. These convey the fileserver's own epoch value back to the filesystem. Signed-off-by: David Howells --- include/net/af_rxrpc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index c4c912554dee..de587948042a 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -78,6 +78,7 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, enum rxrpc_call_completion *, u32 *); u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *); +u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *, ktime_t *); -- cgit v1.2.3 From bbb4c4323a4d9cb5ca04db904aa3050a7586839a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 14:27:55 +0100 Subject: dns: Allow the dns resolver to retrieve a server set Allow the DNS resolver to retrieve a set of servers and their associated addresses, ports, preference and weight ratings. In terms of communication with userspace, "srv=1" is added to the callout string (the '1' indicating the maximum data version supported by the kernel) to ask the userspace side for this. If the userspace side doesn't recognise it, it will ignore the option and return the usual text address list. If the userspace side does recognise it, it will return some binary data that begins with a zero byte that would cause the string parsers to give an error. The second byte contains the version of the data in the blob (this may be between 1 and the version specified in the callout data). The remainder of the payload is version-specific. In version 1, the payload looks like (note that this is packed): u8 Non-string marker (ie. 0) u8 Content (0 => Server list) u8 Version (ie. 1) u8 Source (eg. DNS_RECORD_FROM_DNS_SRV) u8 Status (eg. DNS_LOOKUP_GOOD) u8 Number of servers foreach-server { u16 Name length (LE) u16 Priority (as per SRV record) (LE) u16 Weight (as per SRV record) (LE) u16 Port (LE) u8 Source (eg. DNS_RECORD_FROM_NSS) u8 Status (eg. DNS_LOOKUP_GOT_NOT_FOUND) u8 Protocol (eg. DNS_SERVER_PROTOCOL_UDP) u8 Number of addresses char[] Name (not NUL-terminated) foreach-address { u8 Family (AF_INET{,6}) union { u8[4] ipv4_addr u8[16] ipv6_addr } } } This can then be used to fetch a whole cell's VL-server configuration for AFS, for example. Signed-off-by: David Howells Signed-off-by: David S. Miller --- include/linux/dns_resolver.h | 4 +- include/uapi/linux/dns_resolver.h | 116 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 include/uapi/linux/dns_resolver.h (limited to 'include') diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h index 6ac3cad9aef1..34a744a1bafc 100644 --- a/include/linux/dns_resolver.h +++ b/include/linux/dns_resolver.h @@ -24,11 +24,9 @@ #ifndef _LINUX_DNS_RESOLVER_H #define _LINUX_DNS_RESOLVER_H -#ifdef __KERNEL__ +#include extern int dns_query(const char *type, const char *name, size_t namelen, const char *options, char **_result, time64_t *_expiry); -#endif /* KERNEL */ - #endif /* _LINUX_DNS_RESOLVER_H */ diff --git a/include/uapi/linux/dns_resolver.h b/include/uapi/linux/dns_resolver.h new file mode 100644 index 000000000000..129745f9c794 --- /dev/null +++ b/include/uapi/linux/dns_resolver.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* DNS resolver interface definitions. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_DNS_RESOLVER_H +#define _UAPI_LINUX_DNS_RESOLVER_H + +#include + +/* + * Type of payload. + */ +enum dns_payload_content_type { + DNS_PAYLOAD_IS_SERVER_LIST = 0, /* List of servers, requested by srv=1 */ +}; + +/* + * Type of address that might be found in an address record. + */ +enum dns_payload_address_type { + DNS_ADDRESS_IS_IPV4 = 0, /* 4-byte AF_INET address */ + DNS_ADDRESS_IS_IPV6 = 1, /* 16-byte AF_INET6 address */ +}; + +/* + * Type of protocol used to access a server. + */ +enum dns_payload_protocol_type { + DNS_SERVER_PROTOCOL_UNSPECIFIED = 0, + DNS_SERVER_PROTOCOL_UDP = 1, /* Use UDP to talk to the server */ + DNS_SERVER_PROTOCOL_TCP = 2, /* Use TCP to talk to the server */ +}; + +/* + * Source of record included in DNS resolver payload. + */ +enum dns_record_source { + DNS_RECORD_UNAVAILABLE = 0, /* No source available (empty record) */ + DNS_RECORD_FROM_CONFIG = 1, /* From local configuration data */ + DNS_RECORD_FROM_DNS_A = 2, /* From DNS A or AAAA record */ + DNS_RECORD_FROM_DNS_AFSDB = 3, /* From DNS AFSDB record */ + DNS_RECORD_FROM_DNS_SRV = 4, /* From DNS SRV record */ + DNS_RECORD_FROM_NSS = 5, /* From NSS */ + NR__dns_record_source +}; + +/* + * Status of record included in DNS resolver payload. + */ +enum dns_lookup_status { + DNS_LOOKUP_NOT_DONE = 0, /* No lookup has been made */ + DNS_LOOKUP_GOOD = 1, /* Good records obtained */ + DNS_LOOKUP_GOOD_WITH_BAD = 2, /* Good records, some decoding errors */ + DNS_LOOKUP_BAD = 3, /* Couldn't decode results */ + DNS_LOOKUP_GOT_NOT_FOUND = 4, /* Got a "Not Found" result */ + DNS_LOOKUP_GOT_LOCAL_FAILURE = 5, /* Local failure during lookup */ + DNS_LOOKUP_GOT_TEMP_FAILURE = 6, /* Temporary failure during lookup */ + DNS_LOOKUP_GOT_NS_FAILURE = 7, /* Name server failure */ + NR__dns_lookup_status +}; + +/* + * Header at the beginning of binary format payload. + */ +struct dns_payload_header { + __u8 zero; /* Zero byte: marks this as not being text */ + __u8 content; /* enum dns_payload_content_type */ + __u8 version; /* Encoding version */ +} __packed; + +/* + * Header at the beginning of a V1 server list. This is followed directly by + * the server records. Each server records begins with a struct of type + * dns_server_list_v1_server. + */ +struct dns_server_list_v1_header { + struct dns_payload_header hdr; + __u8 source; /* enum dns_record_source */ + __u8 status; /* enum dns_lookup_status */ + __u8 nr_servers; /* Number of server records following this */ +} __packed; + +/* + * Header at the beginning of each V1 server record. This is followed by the + * characters of the name with no NUL-terminator, followed by the address + * records for that server. Each address record begins with a struct of type + * struct dns_server_list_v1_address. + */ +struct dns_server_list_v1_server { + __u16 name_len; /* Length of name (LE) */ + __u16 priority; /* Priority (as SRV record) (LE) */ + __u16 weight; /* Weight (as SRV record) (LE) */ + __u16 port; /* UDP/TCP port number (LE) */ + __u8 source; /* enum dns_record_source */ + __u8 status; /* enum dns_lookup_status */ + __u8 protocol; /* enum dns_payload_protocol_type */ + __u8 nr_addrs; +} __packed; + +/* + * Header at the beginning of each V1 address record. This is followed by the + * bytes of the address, 4 for IPV4 and 16 for IPV6. + */ +struct dns_server_list_v1_address { + __u8 address_type; /* enum dns_payload_address_type */ +} __packed; + +#endif /* _UAPI_LINUX_DNS_RESOLVER_H */ -- cgit v1.2.3 From e3b5106162a3f73c7633ae6051fbf244584ab584 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 4 Oct 2018 11:13:44 +0530 Subject: devlink: Add generic parameter ignore_ari ignore_ari - Device ignores ARI(Alternate Routing ID) capability, even when platforms has the support and creates same number of partitions when platform does not support ARI capability. Cc: Jiri Pirko Cc: Michael Chan Signed-off-by: Vasundhara Volam Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 70671f0d4c30..ae28ccbd6843 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -362,6 +362,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_MAX_MACS, DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV, DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, + DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -380,6 +381,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME "region_snapshot_enable" #define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME "ignore_ari" +#define DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE DEVLINK_PARAM_TYPE_BOOL + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ -- cgit v1.2.3 From f61cba4291c06c201b1b855a341b036caefdc2d6 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 4 Oct 2018 11:13:45 +0530 Subject: devlink: Add generic parameter msix_vec_per_pf_max msix_vec_per_pf_max - This param sets the number of MSIX vectors that the device requests from the host on driver initialization. This value is set in the device which is applicable per PF. Cc: Jiri Pirko Cc: Michael Chan Signed-off-by: Vasundhara Volam Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index ae28ccbd6843..c9b08b49957c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -363,6 +363,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV, DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -384,6 +385,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME "ignore_ari" #define DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME "msix_vec_per_pf_max" +#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ -- cgit v1.2.3 From 16511789b9cc0a946611b1f9575b7a5b2b566301 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 4 Oct 2018 11:13:46 +0530 Subject: devlink: Add generic parameter msix_vec_per_pf_min msix_vec_per_pf_min - This param sets the number of minimal MSIX vectors required for the device initialization. This value is set in the device which limits MSIX vectors per PF. Cc: Jiri Pirko Cc: Michael Chan Signed-off-by: Vasundhara Volam Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index c9b08b49957c..9a70755ad1c2 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -364,6 +364,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI, DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -388,6 +389,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME "msix_vec_per_pf_max" #define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE DEVLINK_PARAM_TYPE_U32 +#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME "msix_vec_per_pf_min" +#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ -- cgit v1.2.3 From 5a781ccbd19e4664babcbe4b4ead7aa2b9283d22 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 28 Sep 2018 17:59:43 -0700 Subject: tc: Add support for configuring the taprio scheduler This traffic scheduler allows traffic classes states (transmission allowed/not allowed, in the simplest case) to be scheduled, according to a pre-generated time sequence. This is the basis of the IEEE 802.1Qbv specification. Example configuration: tc qdisc replace dev enp3s0 parent root handle 100 taprio \ num_tc 3 \ map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 \ queues 1@0 1@1 2@2 \ base-time 1528743495910289987 \ sched-entry S 01 300000 \ sched-entry S 02 300000 \ sched-entry S 04 300000 \ clockid CLOCK_TAI The configuration format is similar to mqprio. The main difference is the presence of a schedule, built by multiple "sched-entry" definitions, each entry has the following format: sched-entry The only supported is "S", which means "SetGateStates", following the IEEE 802.1Qbv-2015 definition (Table 8-6). is a bitmask where each bit is a associated with a traffic class, so bit 0 (the least significant bit) being "on" means that traffic class 0 is "active" for that schedule entry. is a time duration in nanoseconds that specifies for how long that state defined by and should be held before moving to the next entry. This schedule is circular, that is, after the last entry is executed it starts from the first one, indefinitely. The other parameters can be defined as follows: - base-time: specifies the instant when the schedule starts, if 'base-time' is a time in the past, the schedule will start at base-time + (N * cycle-time) where N is the smallest integer so the resulting time is greater than "now", and "cycle-time" is the sum of all the intervals of the entries in the schedule; - clockid: specifies the reference clock to be used; The parameters should be similar to what the IEEE 802.1Q family of specification defines. Signed-off-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index e9b7244ac381..89ee47c2f17d 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1084,4 +1084,50 @@ enum { CAKE_ATM_MAX }; + +/* TAPRIO */ +enum { + TC_TAPRIO_CMD_SET_GATES = 0x00, + TC_TAPRIO_CMD_SET_AND_HOLD = 0x01, + TC_TAPRIO_CMD_SET_AND_RELEASE = 0x02, +}; + +enum { + TCA_TAPRIO_SCHED_ENTRY_UNSPEC, + TCA_TAPRIO_SCHED_ENTRY_INDEX, /* u32 */ + TCA_TAPRIO_SCHED_ENTRY_CMD, /* u8 */ + TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, /* u32 */ + TCA_TAPRIO_SCHED_ENTRY_INTERVAL, /* u32 */ + __TCA_TAPRIO_SCHED_ENTRY_MAX, +}; +#define TCA_TAPRIO_SCHED_ENTRY_MAX (__TCA_TAPRIO_SCHED_ENTRY_MAX - 1) + +/* The format for schedule entry list is: + * [TCA_TAPRIO_SCHED_ENTRY_LIST] + * [TCA_TAPRIO_SCHED_ENTRY] + * [TCA_TAPRIO_SCHED_ENTRY_CMD] + * [TCA_TAPRIO_SCHED_ENTRY_GATES] + * [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] + */ +enum { + TCA_TAPRIO_SCHED_UNSPEC, + TCA_TAPRIO_SCHED_ENTRY, + __TCA_TAPRIO_SCHED_MAX, +}; + +#define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1) + +enum { + TCA_TAPRIO_ATTR_UNSPEC, + TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ + TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST, /* nested of entry */ + TCA_TAPRIO_ATTR_SCHED_BASE_TIME, /* s64 */ + TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */ + TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */ + TCA_TAPRIO_PAD, + __TCA_TAPRIO_ATTR_MAX, +}; + +#define TCA_TAPRIO_ATTR_MAX (__TCA_TAPRIO_ATTR_MAX - 1) + #endif -- cgit v1.2.3 From d26d4b194e582c6f2070cc5f7f74a72124ad41ef Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 4 Oct 2018 17:07:51 -0700 Subject: net: sched: remove unused helpers tcf_block_dev() doesn't seem to be used anywhere in the tree. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index bbfe27f86d5f..72ffb3120ced 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -65,11 +65,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) return block->q; } -static inline struct net_device *tcf_block_dev(struct tcf_block *block) -{ - return tcf_block_q(block)->dev_queue->dev; -} - void *tcf_block_cb_priv(struct tcf_block_cb *block_cb); struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block, tc_setup_cb_t *cb, void *cb_ident); @@ -122,11 +117,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) return NULL; } -static inline struct net_device *tcf_block_dev(struct tcf_block *block) -{ - return NULL; -} - static inline int tc_setup_cb_block_register(struct tcf_block *block, tc_setup_cb_t *cb, void *cb_priv) -- cgit v1.2.3 From 767a2217533fed696af0d06bee7746d34c4e00aa Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 4 Oct 2018 20:07:51 -0700 Subject: net: common metrics init helper for FIB entries Consolidate initialization of ipv4 and ipv6 metrics when fib entries are created into a single helper, ip_fib_metrics_init, that handles the call to ip_metrics_convert. If no metrics are defined for the fib entry, then the metrics is set to dst_default_metrics. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index e44b1a44f67a..8cbe7e8c9e1e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -420,8 +420,8 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); } -int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len, - u32 *metrics); +struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, + int fc_mx_len); u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); -- cgit v1.2.3 From cc5f0eb2164f9aa11fe631f8d905192e0233e262 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 4 Oct 2018 20:07:52 -0700 Subject: net: Move free of fib_metrics to helper Move the refcounting and potential free of dst metrics associated with a fib entry to a helper and use it in both ipv4 and ipv6. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 8cbe7e8c9e1e..8fdd58ce580d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -422,6 +422,12 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, int fc_mx_len); +static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics) +{ + if (fib_metrics != &dst_default_metrics && + refcount_dec_and_test(&fib_metrics->refcnt)) + kfree(fib_metrics); +} u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); -- cgit v1.2.3 From e1255ed4b6dafd9966c99cde5105891cc1ac70df Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 4 Oct 2018 20:07:53 -0700 Subject: net: common metrics init helper for dst_entry ipv4 and ipv6 both use refcounted metrics if FIB entries have metrics set. Move the common initialization code to a helper and use for both protocols. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 8fdd58ce580d..f9a7125b4bda 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -429,6 +429,18 @@ static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics) kfree(fib_metrics); } +/* ipv4 and ipv6 both use refcounted metrics if it is not the default */ +static inline +void ip_dst_init_metrics(struct dst_entry *dst, struct dst_metrics *fib_metrics) +{ + dst_init_metrics(dst, fib_metrics->metrics, true); + + if (fib_metrics != &dst_default_metrics) { + dst->_metrics |= DST_METRICS_REFCOUNTED; + refcount_inc(&fib_metrics->refcnt); + } +} + u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); -- cgit v1.2.3 From 1620a33695d81611360d813a47ebde9386714036 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 4 Oct 2018 20:07:54 -0700 Subject: net: Move free of dst_metrics to helper Move the refcounting and potential free of dst metrics associated for ipv4 and ipv6 to a common helper. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index f9a7125b4bda..72593e171d14 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -441,6 +441,15 @@ void ip_dst_init_metrics(struct dst_entry *dst, struct dst_metrics *fib_metrics) } } +static inline +void ip_dst_metrics_put(struct dst_entry *dst) +{ + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); + + if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) + kfree(p); +} + u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); -- cgit v1.2.3 From 661b8d1b0e3a745e25f05adef2ebd00d830eeea7 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 1 Oct 2018 14:51:33 +0200 Subject: net: add umem reference in netdev{_rx}_queue These references to the umem will be used to store information on what kind of AF_XDP umem that is bound to a queue id, if any. Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1cbbf77a685f..8318f79586c2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -609,6 +609,9 @@ struct netdev_queue { /* Subordinate device that the queue has been assigned to */ struct net_device *sb_dev; +#ifdef CONFIG_XDP_SOCKETS + struct xdp_umem *umem; +#endif /* * write-mostly part */ @@ -738,6 +741,9 @@ struct netdev_rx_queue { struct kobject kobj; struct net_device *dev; struct xdp_rxq_info xdp_rxq; +#ifdef CONFIG_XDP_SOCKETS + struct xdp_umem *umem; +#endif } ____cacheline_aligned_in_smp; /* -- cgit v1.2.3 From 1661d346628115c364e2b7d5b15a64ca3bd0dbd4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 1 Oct 2018 14:51:36 +0200 Subject: ethtool: don't allow disabling queues with umem installed We already check the RSS indirection table does not use queues which would be disabled by channel reconfiguration. Make sure user does not try to disable queues which have a UMEM and zero-copy AF_XDP socket installed. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- include/net/xdp_sock.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 70a115bea4f4..13acb9803a6d 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -86,6 +86,7 @@ struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries); struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem, struct xdp_umem_fq_reuse *newq); void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq); +struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id); static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) { @@ -183,6 +184,12 @@ static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq) { } +static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, + u16 queue_id) +{ + return NULL; +} + static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) { return NULL; -- cgit v1.2.3 From 95278ddaa15cfa23e4a06ee9ed7b6ee0197c500b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 2 Oct 2018 12:50:19 -0700 Subject: net_sched: convert idrinfo->lock from spinlock to a mutex In commit ec3ed293e766 ("net_sched: change tcf_del_walker() to take idrinfo->lock") we move fl_hw_destroy_tmplt() to a workqueue to avoid blocking with the spinlock held. Unfortunately, this causes a lot of troubles here: 1. tcf_chain_destroy() could be called right after we queue the work but before the work runs. This is a use-after-free. 2. The chain refcnt is already 0, we can't even just hold it again. We can check refcnt==1 but it is ugly. 3. The chain with refcnt 0 is still visible in its block, which means it could be still found and used! 4. The block has a refcnt too, we can't hold it without introducing a proper API either. We can make it working but the end result is ugly. Instead of wasting time on reviewing it, let's just convert the troubling spinlock to a mutex, which allows us to use non-atomic allocations too. Fixes: ec3ed293e766 ("net_sched: change tcf_del_walker() to take idrinfo->lock") Reported-by: Ido Schimmel Cc: Jamal Hadi Salim Cc: Vlad Buslov Cc: Jiri Pirko Signed-off-by: Cong Wang Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/act_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 1ddff3360592..05c7df41d737 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -13,7 +13,7 @@ #include struct tcf_idrinfo { - spinlock_t lock; + struct mutex lock; struct idr action_idr; }; @@ -117,7 +117,7 @@ int tc_action_net_init(struct tc_action_net *tn, if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; - spin_lock_init(&tn->idrinfo->lock); + mutex_init(&tn->idrinfo->lock); idr_init(&tn->idrinfo->action_idr); return err; } -- cgit v1.2.3 From f2e9de210d50187d206989e60bc5a99c2b692209 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 5 Oct 2018 11:31:40 -0400 Subject: udp: gro behind static key Avoid the socket lookup cost in udp_gro_receive if no socket has a udp tunnel callback configured. udp_sk(sk)->gro_receive requires a registration with setup_udp_tunnel_sock, which enables the static key. Signed-off-by: Willem de Bruijn Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/udp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index 8482a990b0bb..9e82cb391dea 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -443,8 +443,10 @@ int udpv4_offload_init(void); void udp_init(void); +DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); void udp_encap_enable(void); #if IS_ENABLED(CONFIG_IPV6) +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void); #endif -- cgit v1.2.3 From 8afc978925ba0a0e8c7f76ebd0d764de0da2c0e9 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Thu, 4 Oct 2018 14:22:01 +0200 Subject: net: mscc: ocelot: move the HSIO header to include/soc Since HSIO address space can be used by different drivers (PLL, SerDes muxing, temperature sensor), let's move it somewhere it can be included by all drivers. Reviewed-by: Florian Fainelli Acked-by: Alexandre Belloni Signed-off-by: Quentin Schulz Signed-off-by: David S. Miller --- include/soc/mscc/ocelot_hsio.h | 785 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 785 insertions(+) create mode 100644 include/soc/mscc/ocelot_hsio.h (limited to 'include') diff --git a/include/soc/mscc/ocelot_hsio.h b/include/soc/mscc/ocelot_hsio.h new file mode 100644 index 000000000000..d93ddec3931b --- /dev/null +++ b/include/soc/mscc/ocelot_hsio.h @@ -0,0 +1,785 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* + * Microsemi Ocelot Switch driver + * + * Copyright (c) 2017 Microsemi Corporation + */ + +#ifndef _MSCC_OCELOT_HSIO_H_ +#define _MSCC_OCELOT_HSIO_H_ + +#define HSIO_PLL5G_CFG0_ENA_ROT BIT(31) +#define HSIO_PLL5G_CFG0_ENA_LANE BIT(30) +#define HSIO_PLL5G_CFG0_ENA_CLKTREE BIT(29) +#define HSIO_PLL5G_CFG0_DIV4 BIT(28) +#define HSIO_PLL5G_CFG0_ENA_LOCK_FINE BIT(27) +#define HSIO_PLL5G_CFG0_SELBGV820(x) (((x) << 23) & GENMASK(26, 23)) +#define HSIO_PLL5G_CFG0_SELBGV820_M GENMASK(26, 23) +#define HSIO_PLL5G_CFG0_SELBGV820_X(x) (((x) & GENMASK(26, 23)) >> 23) +#define HSIO_PLL5G_CFG0_LOOP_BW_RES(x) (((x) << 18) & GENMASK(22, 18)) +#define HSIO_PLL5G_CFG0_LOOP_BW_RES_M GENMASK(22, 18) +#define HSIO_PLL5G_CFG0_LOOP_BW_RES_X(x) (((x) & GENMASK(22, 18)) >> 18) +#define HSIO_PLL5G_CFG0_SELCPI(x) (((x) << 16) & GENMASK(17, 16)) +#define HSIO_PLL5G_CFG0_SELCPI_M GENMASK(17, 16) +#define HSIO_PLL5G_CFG0_SELCPI_X(x) (((x) & GENMASK(17, 16)) >> 16) +#define HSIO_PLL5G_CFG0_ENA_VCO_CONTRH BIT(15) +#define HSIO_PLL5G_CFG0_ENA_CP1 BIT(14) +#define HSIO_PLL5G_CFG0_ENA_VCO_BUF BIT(13) +#define HSIO_PLL5G_CFG0_ENA_BIAS BIT(12) +#define HSIO_PLL5G_CFG0_CPU_CLK_DIV(x) (((x) << 6) & GENMASK(11, 6)) +#define HSIO_PLL5G_CFG0_CPU_CLK_DIV_M GENMASK(11, 6) +#define HSIO_PLL5G_CFG0_CPU_CLK_DIV_X(x) (((x) & GENMASK(11, 6)) >> 6) +#define HSIO_PLL5G_CFG0_CORE_CLK_DIV(x) ((x) & GENMASK(5, 0)) +#define HSIO_PLL5G_CFG0_CORE_CLK_DIV_M GENMASK(5, 0) + +#define HSIO_PLL5G_CFG1_ENA_DIRECT BIT(18) +#define HSIO_PLL5G_CFG1_ROT_SPEED BIT(17) +#define HSIO_PLL5G_CFG1_ROT_DIR BIT(16) +#define HSIO_PLL5G_CFG1_READBACK_DATA_SEL BIT(15) +#define HSIO_PLL5G_CFG1_RC_ENABLE BIT(14) +#define HSIO_PLL5G_CFG1_RC_CTRL_DATA(x) (((x) << 6) & GENMASK(13, 6)) +#define HSIO_PLL5G_CFG1_RC_CTRL_DATA_M GENMASK(13, 6) +#define HSIO_PLL5G_CFG1_RC_CTRL_DATA_X(x) (((x) & GENMASK(13, 6)) >> 6) +#define HSIO_PLL5G_CFG1_QUARTER_RATE BIT(5) +#define HSIO_PLL5G_CFG1_PWD_TX BIT(4) +#define HSIO_PLL5G_CFG1_PWD_RX BIT(3) +#define HSIO_PLL5G_CFG1_OUT_OF_RANGE_RECAL_ENA BIT(2) +#define HSIO_PLL5G_CFG1_HALF_RATE BIT(1) +#define HSIO_PLL5G_CFG1_FORCE_SET_ENA BIT(0) + +#define HSIO_PLL5G_CFG2_ENA_TEST_MODE BIT(30) +#define HSIO_PLL5G_CFG2_ENA_PFD_IN_FLIP BIT(29) +#define HSIO_PLL5G_CFG2_ENA_VCO_NREF_TESTOUT BIT(28) +#define HSIO_PLL5G_CFG2_ENA_FBTESTOUT BIT(27) +#define HSIO_PLL5G_CFG2_ENA_RCPLL BIT(26) +#define HSIO_PLL5G_CFG2_ENA_CP2 BIT(25) +#define HSIO_PLL5G_CFG2_ENA_CLK_BYPASS1 BIT(24) +#define HSIO_PLL5G_CFG2_AMPC_SEL(x) (((x) << 16) & GENMASK(23, 16)) +#define HSIO_PLL5G_CFG2_AMPC_SEL_M GENMASK(23, 16) +#define HSIO_PLL5G_CFG2_AMPC_SEL_X(x) (((x) & GENMASK(23, 16)) >> 16) +#define HSIO_PLL5G_CFG2_ENA_CLK_BYPASS BIT(15) +#define HSIO_PLL5G_CFG2_PWD_AMPCTRL_N BIT(14) +#define HSIO_PLL5G_CFG2_ENA_AMPCTRL BIT(13) +#define HSIO_PLL5G_CFG2_ENA_AMP_CTRL_FORCE BIT(12) +#define HSIO_PLL5G_CFG2_FRC_FSM_POR BIT(11) +#define HSIO_PLL5G_CFG2_DISABLE_FSM_POR BIT(10) +#define HSIO_PLL5G_CFG2_GAIN_TEST(x) (((x) << 5) & GENMASK(9, 5)) +#define HSIO_PLL5G_CFG2_GAIN_TEST_M GENMASK(9, 5) +#define HSIO_PLL5G_CFG2_GAIN_TEST_X(x) (((x) & GENMASK(9, 5)) >> 5) +#define HSIO_PLL5G_CFG2_EN_RESET_OVERRUN BIT(4) +#define HSIO_PLL5G_CFG2_EN_RESET_LIM_DET BIT(3) +#define HSIO_PLL5G_CFG2_EN_RESET_FRQ_DET BIT(2) +#define HSIO_PLL5G_CFG2_DISABLE_FSM BIT(1) +#define HSIO_PLL5G_CFG2_ENA_GAIN_TEST BIT(0) + +#define HSIO_PLL5G_CFG3_TEST_ANA_OUT_SEL(x) (((x) << 22) & GENMASK(23, 22)) +#define HSIO_PLL5G_CFG3_TEST_ANA_OUT_SEL_M GENMASK(23, 22) +#define HSIO_PLL5G_CFG3_TEST_ANA_OUT_SEL_X(x) (((x) & GENMASK(23, 22)) >> 22) +#define HSIO_PLL5G_CFG3_TESTOUT_SEL(x) (((x) << 19) & GENMASK(21, 19)) +#define HSIO_PLL5G_CFG3_TESTOUT_SEL_M GENMASK(21, 19) +#define HSIO_PLL5G_CFG3_TESTOUT_SEL_X(x) (((x) & GENMASK(21, 19)) >> 19) +#define HSIO_PLL5G_CFG3_ENA_ANA_TEST_OUT BIT(18) +#define HSIO_PLL5G_CFG3_ENA_TEST_OUT BIT(17) +#define HSIO_PLL5G_CFG3_SEL_FBDCLK BIT(16) +#define HSIO_PLL5G_CFG3_SEL_CML_CMOS_PFD BIT(15) +#define HSIO_PLL5G_CFG3_RST_FB_N BIT(14) +#define HSIO_PLL5G_CFG3_FORCE_VCO_CONTRH BIT(13) +#define HSIO_PLL5G_CFG3_FORCE_LO BIT(12) +#define HSIO_PLL5G_CFG3_FORCE_HI BIT(11) +#define HSIO_PLL5G_CFG3_FORCE_ENA BIT(10) +#define HSIO_PLL5G_CFG3_FORCE_CP BIT(9) +#define HSIO_PLL5G_CFG3_FBDIVSEL_TST_ENA BIT(8) +#define HSIO_PLL5G_CFG3_FBDIVSEL(x) ((x) & GENMASK(7, 0)) +#define HSIO_PLL5G_CFG3_FBDIVSEL_M GENMASK(7, 0) + +#define HSIO_PLL5G_CFG4_IB_BIAS_CTRL(x) (((x) << 16) & GENMASK(23, 16)) +#define HSIO_PLL5G_CFG4_IB_BIAS_CTRL_M GENMASK(23, 16) +#define HSIO_PLL5G_CFG4_IB_BIAS_CTRL_X(x) (((x) & GENMASK(23, 16)) >> 16) +#define HSIO_PLL5G_CFG4_IB_CTRL(x) ((x) & GENMASK(15, 0)) +#define HSIO_PLL5G_CFG4_IB_CTRL_M GENMASK(15, 0) + +#define HSIO_PLL5G_CFG5_OB_BIAS_CTRL(x) (((x) << 16) & GENMASK(23, 16)) +#define HSIO_PLL5G_CFG5_OB_BIAS_CTRL_M GENMASK(23, 16) +#define HSIO_PLL5G_CFG5_OB_BIAS_CTRL_X(x) (((x) & GENMASK(23, 16)) >> 16) +#define HSIO_PLL5G_CFG5_OB_CTRL(x) ((x) & GENMASK(15, 0)) +#define HSIO_PLL5G_CFG5_OB_CTRL_M GENMASK(15, 0) + +#define HSIO_PLL5G_CFG6_REFCLK_SEL_SRC BIT(23) +#define HSIO_PLL5G_CFG6_REFCLK_SEL(x) (((x) << 20) & GENMASK(22, 20)) +#define HSIO_PLL5G_CFG6_REFCLK_SEL_M GENMASK(22, 20) +#define HSIO_PLL5G_CFG6_REFCLK_SEL_X(x) (((x) & GENMASK(22, 20)) >> 20) +#define HSIO_PLL5G_CFG6_REFCLK_SRC BIT(19) +#define HSIO_PLL5G_CFG6_POR_DEL_SEL(x) (((x) << 16) & GENMASK(17, 16)) +#define HSIO_PLL5G_CFG6_POR_DEL_SEL_M GENMASK(17, 16) +#define HSIO_PLL5G_CFG6_POR_DEL_SEL_X(x) (((x) & GENMASK(17, 16)) >> 16) +#define HSIO_PLL5G_CFG6_DIV125REF_SEL(x) (((x) << 8) & GENMASK(15, 8)) +#define HSIO_PLL5G_CFG6_DIV125REF_SEL_M GENMASK(15, 8) +#define HSIO_PLL5G_CFG6_DIV125REF_SEL_X(x) (((x) & GENMASK(15, 8)) >> 8) +#define HSIO_PLL5G_CFG6_ENA_REFCLKC2 BIT(7) +#define HSIO_PLL5G_CFG6_ENA_FBCLKC2 BIT(6) +#define HSIO_PLL5G_CFG6_DDR_CLK_DIV(x) ((x) & GENMASK(5, 0)) +#define HSIO_PLL5G_CFG6_DDR_CLK_DIV_M GENMASK(5, 0) + +#define HSIO_PLL5G_STATUS0_RANGE_LIM BIT(12) +#define HSIO_PLL5G_STATUS0_OUT_OF_RANGE_ERR BIT(11) +#define HSIO_PLL5G_STATUS0_CALIBRATION_ERR BIT(10) +#define HSIO_PLL5G_STATUS0_CALIBRATION_DONE BIT(9) +#define HSIO_PLL5G_STATUS0_READBACK_DATA(x) (((x) << 1) & GENMASK(8, 1)) +#define HSIO_PLL5G_STATUS0_READBACK_DATA_M GENMASK(8, 1) +#define HSIO_PLL5G_STATUS0_READBACK_DATA_X(x) (((x) & GENMASK(8, 1)) >> 1) +#define HSIO_PLL5G_STATUS0_LOCK_STATUS BIT(0) + +#define HSIO_PLL5G_STATUS1_SIG_DEL(x) (((x) << 21) & GENMASK(28, 21)) +#define HSIO_PLL5G_STATUS1_SIG_DEL_M GENMASK(28, 21) +#define HSIO_PLL5G_STATUS1_SIG_DEL_X(x) (((x) & GENMASK(28, 21)) >> 21) +#define HSIO_PLL5G_STATUS1_GAIN_STAT(x) (((x) << 16) & GENMASK(20, 16)) +#define HSIO_PLL5G_STATUS1_GAIN_STAT_M GENMASK(20, 16) +#define HSIO_PLL5G_STATUS1_GAIN_STAT_X(x) (((x) & GENMASK(20, 16)) >> 16) +#define HSIO_PLL5G_STATUS1_FBCNT_DIF(x) (((x) << 4) & GENMASK(13, 4)) +#define HSIO_PLL5G_STATUS1_FBCNT_DIF_M GENMASK(13, 4) +#define HSIO_PLL5G_STATUS1_FBCNT_DIF_X(x) (((x) & GENMASK(13, 4)) >> 4) +#define HSIO_PLL5G_STATUS1_FSM_STAT(x) (((x) << 1) & GENMASK(3, 1)) +#define HSIO_PLL5G_STATUS1_FSM_STAT_M GENMASK(3, 1) +#define HSIO_PLL5G_STATUS1_FSM_STAT_X(x) (((x) & GENMASK(3, 1)) >> 1) +#define HSIO_PLL5G_STATUS1_FSM_LOCK BIT(0) + +#define HSIO_PLL5G_BIST_CFG0_PLLB_START_BIST BIT(31) +#define HSIO_PLL5G_BIST_CFG0_PLLB_MEAS_MODE BIT(30) +#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_REPEAT(x) (((x) << 20) & GENMASK(23, 20)) +#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_REPEAT_M GENMASK(23, 20) +#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_REPEAT_X(x) (((x) & GENMASK(23, 20)) >> 20) +#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_UNCERT(x) (((x) << 16) & GENMASK(19, 16)) +#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_UNCERT_M GENMASK(19, 16) +#define HSIO_PLL5G_BIST_CFG0_PLLB_LOCK_UNCERT_X(x) (((x) & GENMASK(19, 16)) >> 16) +#define HSIO_PLL5G_BIST_CFG0_PLLB_DIV_FACTOR_PRE(x) ((x) & GENMASK(15, 0)) +#define HSIO_PLL5G_BIST_CFG0_PLLB_DIV_FACTOR_PRE_M GENMASK(15, 0) + +#define HSIO_PLL5G_BIST_STAT0_PLLB_FSM_STAT(x) (((x) << 4) & GENMASK(7, 4)) +#define HSIO_PLL5G_BIST_STAT0_PLLB_FSM_STAT_M GENMASK(7, 4) +#define HSIO_PLL5G_BIST_STAT0_PLLB_FSM_STAT_X(x) (((x) & GENMASK(7, 4)) >> 4) +#define HSIO_PLL5G_BIST_STAT0_PLLB_BUSY BIT(2) +#define HSIO_PLL5G_BIST_STAT0_PLLB_DONE_N BIT(1) +#define HSIO_PLL5G_BIST_STAT0_PLLB_FAIL BIT(0) + +#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_OUT(x) (((x) << 16) & GENMASK(31, 16)) +#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_OUT_M GENMASK(31, 16) +#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_OUT_X(x) (((x) & GENMASK(31, 16)) >> 16) +#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_REF_DIFF(x) ((x) & GENMASK(15, 0)) +#define HSIO_PLL5G_BIST_STAT1_PLLB_CNT_REF_DIFF_M GENMASK(15, 0) + +#define HSIO_RCOMP_CFG0_PWD_ENA BIT(13) +#define HSIO_RCOMP_CFG0_RUN_CAL BIT(12) +#define HSIO_RCOMP_CFG0_SPEED_SEL(x) (((x) << 10) & GENMASK(11, 10)) +#define HSIO_RCOMP_CFG0_SPEED_SEL_M GENMASK(11, 10) +#define HSIO_RCOMP_CFG0_SPEED_SEL_X(x) (((x) & GENMASK(11, 10)) >> 10) +#define HSIO_RCOMP_CFG0_MODE_SEL(x) (((x) << 8) & GENMASK(9, 8)) +#define HSIO_RCOMP_CFG0_MODE_SEL_M GENMASK(9, 8) +#define HSIO_RCOMP_CFG0_MODE_SEL_X(x) (((x) & GENMASK(9, 8)) >> 8) +#define HSIO_RCOMP_CFG0_FORCE_ENA BIT(4) +#define HSIO_RCOMP_CFG0_RCOMP_VAL(x) ((x) & GENMASK(3, 0)) +#define HSIO_RCOMP_CFG0_RCOMP_VAL_M GENMASK(3, 0) + +#define HSIO_RCOMP_STATUS_BUSY BIT(12) +#define HSIO_RCOMP_STATUS_DELTA_ALERT BIT(7) +#define HSIO_RCOMP_STATUS_RCOMP(x) ((x) & GENMASK(3, 0)) +#define HSIO_RCOMP_STATUS_RCOMP_M GENMASK(3, 0) + +#define HSIO_SYNC_ETH_CFG_RSZ 0x4 + +#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_SRC(x) (((x) << 4) & GENMASK(7, 4)) +#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_SRC_M GENMASK(7, 4) +#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_SRC_X(x) (((x) & GENMASK(7, 4)) >> 4) +#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_DIV(x) (((x) << 1) & GENMASK(3, 1)) +#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_DIV_M GENMASK(3, 1) +#define HSIO_SYNC_ETH_CFG_SEL_RECO_CLK_DIV_X(x) (((x) & GENMASK(3, 1)) >> 1) +#define HSIO_SYNC_ETH_CFG_RECO_CLK_ENA BIT(0) + +#define HSIO_SYNC_ETH_PLL_CFG_PLL_AUTO_SQUELCH_ENA BIT(0) + +#define HSIO_S1G_DES_CFG_DES_PHS_CTRL(x) (((x) << 13) & GENMASK(16, 13)) +#define HSIO_S1G_DES_CFG_DES_PHS_CTRL_M GENMASK(16, 13) +#define HSIO_S1G_DES_CFG_DES_PHS_CTRL_X(x) (((x) & GENMASK(16, 13)) >> 13) +#define HSIO_S1G_DES_CFG_DES_CPMD_SEL(x) (((x) << 11) & GENMASK(12, 11)) +#define HSIO_S1G_DES_CFG_DES_CPMD_SEL_M GENMASK(12, 11) +#define HSIO_S1G_DES_CFG_DES_CPMD_SEL_X(x) (((x) & GENMASK(12, 11)) >> 11) +#define HSIO_S1G_DES_CFG_DES_MBTR_CTRL(x) (((x) << 8) & GENMASK(10, 8)) +#define HSIO_S1G_DES_CFG_DES_MBTR_CTRL_M GENMASK(10, 8) +#define HSIO_S1G_DES_CFG_DES_MBTR_CTRL_X(x) (((x) & GENMASK(10, 8)) >> 8) +#define HSIO_S1G_DES_CFG_DES_BW_ANA(x) (((x) << 5) & GENMASK(7, 5)) +#define HSIO_S1G_DES_CFG_DES_BW_ANA_M GENMASK(7, 5) +#define HSIO_S1G_DES_CFG_DES_BW_ANA_X(x) (((x) & GENMASK(7, 5)) >> 5) +#define HSIO_S1G_DES_CFG_DES_SWAP_ANA BIT(4) +#define HSIO_S1G_DES_CFG_DES_BW_HYST(x) (((x) << 1) & GENMASK(3, 1)) +#define HSIO_S1G_DES_CFG_DES_BW_HYST_M GENMASK(3, 1) +#define HSIO_S1G_DES_CFG_DES_BW_HYST_X(x) (((x) & GENMASK(3, 1)) >> 1) +#define HSIO_S1G_DES_CFG_DES_SWAP_HYST BIT(0) + +#define HSIO_S1G_IB_CFG_IB_FX100_ENA BIT(27) +#define HSIO_S1G_IB_CFG_ACJTAG_HYST(x) (((x) << 24) & GENMASK(26, 24)) +#define HSIO_S1G_IB_CFG_ACJTAG_HYST_M GENMASK(26, 24) +#define HSIO_S1G_IB_CFG_ACJTAG_HYST_X(x) (((x) & GENMASK(26, 24)) >> 24) +#define HSIO_S1G_IB_CFG_IB_DET_LEV(x) (((x) << 19) & GENMASK(21, 19)) +#define HSIO_S1G_IB_CFG_IB_DET_LEV_M GENMASK(21, 19) +#define HSIO_S1G_IB_CFG_IB_DET_LEV_X(x) (((x) & GENMASK(21, 19)) >> 19) +#define HSIO_S1G_IB_CFG_IB_HYST_LEV BIT(14) +#define HSIO_S1G_IB_CFG_IB_ENA_CMV_TERM BIT(13) +#define HSIO_S1G_IB_CFG_IB_ENA_DC_COUPLING BIT(12) +#define HSIO_S1G_IB_CFG_IB_ENA_DETLEV BIT(11) +#define HSIO_S1G_IB_CFG_IB_ENA_HYST BIT(10) +#define HSIO_S1G_IB_CFG_IB_ENA_OFFSET_COMP BIT(9) +#define HSIO_S1G_IB_CFG_IB_EQ_GAIN(x) (((x) << 6) & GENMASK(8, 6)) +#define HSIO_S1G_IB_CFG_IB_EQ_GAIN_M GENMASK(8, 6) +#define HSIO_S1G_IB_CFG_IB_EQ_GAIN_X(x) (((x) & GENMASK(8, 6)) >> 6) +#define HSIO_S1G_IB_CFG_IB_SEL_CORNER_FREQ(x) (((x) << 4) & GENMASK(5, 4)) +#define HSIO_S1G_IB_CFG_IB_SEL_CORNER_FREQ_M GENMASK(5, 4) +#define HSIO_S1G_IB_CFG_IB_SEL_CORNER_FREQ_X(x) (((x) & GENMASK(5, 4)) >> 4) +#define HSIO_S1G_IB_CFG_IB_RESISTOR_CTRL(x) ((x) & GENMASK(3, 0)) +#define HSIO_S1G_IB_CFG_IB_RESISTOR_CTRL_M GENMASK(3, 0) + +#define HSIO_S1G_OB_CFG_OB_SLP(x) (((x) << 17) & GENMASK(18, 17)) +#define HSIO_S1G_OB_CFG_OB_SLP_M GENMASK(18, 17) +#define HSIO_S1G_OB_CFG_OB_SLP_X(x) (((x) & GENMASK(18, 17)) >> 17) +#define HSIO_S1G_OB_CFG_OB_AMP_CTRL(x) (((x) << 13) & GENMASK(16, 13)) +#define HSIO_S1G_OB_CFG_OB_AMP_CTRL_M GENMASK(16, 13) +#define HSIO_S1G_OB_CFG_OB_AMP_CTRL_X(x) (((x) & GENMASK(16, 13)) >> 13) +#define HSIO_S1G_OB_CFG_OB_CMM_BIAS_CTRL(x) (((x) << 10) & GENMASK(12, 10)) +#define HSIO_S1G_OB_CFG_OB_CMM_BIAS_CTRL_M GENMASK(12, 10) +#define HSIO_S1G_OB_CFG_OB_CMM_BIAS_CTRL_X(x) (((x) & GENMASK(12, 10)) >> 10) +#define HSIO_S1G_OB_CFG_OB_DIS_VCM_CTRL BIT(9) +#define HSIO_S1G_OB_CFG_OB_EN_MEAS_VREG BIT(8) +#define HSIO_S1G_OB_CFG_OB_VCM_CTRL(x) (((x) << 4) & GENMASK(7, 4)) +#define HSIO_S1G_OB_CFG_OB_VCM_CTRL_M GENMASK(7, 4) +#define HSIO_S1G_OB_CFG_OB_VCM_CTRL_X(x) (((x) & GENMASK(7, 4)) >> 4) +#define HSIO_S1G_OB_CFG_OB_RESISTOR_CTRL(x) ((x) & GENMASK(3, 0)) +#define HSIO_S1G_OB_CFG_OB_RESISTOR_CTRL_M GENMASK(3, 0) + +#define HSIO_S1G_SER_CFG_SER_IDLE BIT(9) +#define HSIO_S1G_SER_CFG_SER_DEEMPH BIT(8) +#define HSIO_S1G_SER_CFG_SER_CPMD_SEL BIT(7) +#define HSIO_S1G_SER_CFG_SER_SWAP_CPMD BIT(6) +#define HSIO_S1G_SER_CFG_SER_ALISEL(x) (((x) << 4) & GENMASK(5, 4)) +#define HSIO_S1G_SER_CFG_SER_ALISEL_M GENMASK(5, 4) +#define HSIO_S1G_SER_CFG_SER_ALISEL_X(x) (((x) & GENMASK(5, 4)) >> 4) +#define HSIO_S1G_SER_CFG_SER_ENHYS BIT(3) +#define HSIO_S1G_SER_CFG_SER_BIG_WIN BIT(2) +#define HSIO_S1G_SER_CFG_SER_EN_WIN BIT(1) +#define HSIO_S1G_SER_CFG_SER_ENALI BIT(0) + +#define HSIO_S1G_COMMON_CFG_SYS_RST BIT(31) +#define HSIO_S1G_COMMON_CFG_SE_AUTO_SQUELCH_ENA BIT(21) +#define HSIO_S1G_COMMON_CFG_ENA_LANE BIT(18) +#define HSIO_S1G_COMMON_CFG_PWD_RX BIT(17) +#define HSIO_S1G_COMMON_CFG_PWD_TX BIT(16) +#define HSIO_S1G_COMMON_CFG_LANE_CTRL(x) (((x) << 13) & GENMASK(15, 13)) +#define HSIO_S1G_COMMON_CFG_LANE_CTRL_M GENMASK(15, 13) +#define HSIO_S1G_COMMON_CFG_LANE_CTRL_X(x) (((x) & GENMASK(15, 13)) >> 13) +#define HSIO_S1G_COMMON_CFG_ENA_DIRECT BIT(12) +#define HSIO_S1G_COMMON_CFG_ENA_ELOOP BIT(11) +#define HSIO_S1G_COMMON_CFG_ENA_FLOOP BIT(10) +#define HSIO_S1G_COMMON_CFG_ENA_ILOOP BIT(9) +#define HSIO_S1G_COMMON_CFG_ENA_PLOOP BIT(8) +#define HSIO_S1G_COMMON_CFG_HRATE BIT(7) +#define HSIO_S1G_COMMON_CFG_IF_MODE BIT(0) + +#define HSIO_S1G_PLL_CFG_PLL_ENA_FB_DIV2 BIT(22) +#define HSIO_S1G_PLL_CFG_PLL_ENA_RC_DIV2 BIT(21) +#define HSIO_S1G_PLL_CFG_PLL_FSM_CTRL_DATA(x) (((x) << 8) & GENMASK(15, 8)) +#define HSIO_S1G_PLL_CFG_PLL_FSM_CTRL_DATA_M GENMASK(15, 8) +#define HSIO_S1G_PLL_CFG_PLL_FSM_CTRL_DATA_X(x) (((x) & GENMASK(15, 8)) >> 8) +#define HSIO_S1G_PLL_CFG_PLL_FSM_ENA BIT(7) +#define HSIO_S1G_PLL_CFG_PLL_FSM_FORCE_SET_ENA BIT(6) +#define HSIO_S1G_PLL_CFG_PLL_FSM_OOR_RECAL_ENA BIT(5) +#define HSIO_S1G_PLL_CFG_PLL_RB_DATA_SEL BIT(3) + +#define HSIO_S1G_PLL_STATUS_PLL_CAL_NOT_DONE BIT(12) +#define HSIO_S1G_PLL_STATUS_PLL_CAL_ERR BIT(11) +#define HSIO_S1G_PLL_STATUS_PLL_OUT_OF_RANGE_ERR BIT(10) +#define HSIO_S1G_PLL_STATUS_PLL_RB_DATA(x) ((x) & GENMASK(7, 0)) +#define HSIO_S1G_PLL_STATUS_PLL_RB_DATA_M GENMASK(7, 0) + +#define HSIO_S1G_DFT_CFG0_LAZYBIT BIT(31) +#define HSIO_S1G_DFT_CFG0_INV_DIS BIT(23) +#define HSIO_S1G_DFT_CFG0_PRBS_SEL(x) (((x) << 20) & GENMASK(21, 20)) +#define HSIO_S1G_DFT_CFG0_PRBS_SEL_M GENMASK(21, 20) +#define HSIO_S1G_DFT_CFG0_PRBS_SEL_X(x) (((x) & GENMASK(21, 20)) >> 20) +#define HSIO_S1G_DFT_CFG0_TEST_MODE(x) (((x) << 16) & GENMASK(18, 16)) +#define HSIO_S1G_DFT_CFG0_TEST_MODE_M GENMASK(18, 16) +#define HSIO_S1G_DFT_CFG0_TEST_MODE_X(x) (((x) & GENMASK(18, 16)) >> 16) +#define HSIO_S1G_DFT_CFG0_RX_PHS_CORR_DIS BIT(4) +#define HSIO_S1G_DFT_CFG0_RX_PDSENS_ENA BIT(3) +#define HSIO_S1G_DFT_CFG0_RX_DFT_ENA BIT(2) +#define HSIO_S1G_DFT_CFG0_TX_DFT_ENA BIT(0) + +#define HSIO_S1G_DFT_CFG1_TX_JITTER_AMPL(x) (((x) << 8) & GENMASK(17, 8)) +#define HSIO_S1G_DFT_CFG1_TX_JITTER_AMPL_M GENMASK(17, 8) +#define HSIO_S1G_DFT_CFG1_TX_JITTER_AMPL_X(x) (((x) & GENMASK(17, 8)) >> 8) +#define HSIO_S1G_DFT_CFG1_TX_STEP_FREQ(x) (((x) << 4) & GENMASK(7, 4)) +#define HSIO_S1G_DFT_CFG1_TX_STEP_FREQ_M GENMASK(7, 4) +#define HSIO_S1G_DFT_CFG1_TX_STEP_FREQ_X(x) (((x) & GENMASK(7, 4)) >> 4) +#define HSIO_S1G_DFT_CFG1_TX_JI_ENA BIT(3) +#define HSIO_S1G_DFT_CFG1_TX_WAVEFORM_SEL BIT(2) +#define HSIO_S1G_DFT_CFG1_TX_FREQOFF_DIR BIT(1) +#define HSIO_S1G_DFT_CFG1_TX_FREQOFF_ENA BIT(0) + +#define HSIO_S1G_DFT_CFG2_RX_JITTER_AMPL(x) (((x) << 8) & GENMASK(17, 8)) +#define HSIO_S1G_DFT_CFG2_RX_JITTER_AMPL_M GENMASK(17, 8) +#define HSIO_S1G_DFT_CFG2_RX_JITTER_AMPL_X(x) (((x) & GENMASK(17, 8)) >> 8) +#define HSIO_S1G_DFT_CFG2_RX_STEP_FREQ(x) (((x) << 4) & GENMASK(7, 4)) +#define HSIO_S1G_DFT_CFG2_RX_STEP_FREQ_M GENMASK(7, 4) +#define HSIO_S1G_DFT_CFG2_RX_STEP_FREQ_X(x) (((x) & GENMASK(7, 4)) >> 4) +#define HSIO_S1G_DFT_CFG2_RX_JI_ENA BIT(3) +#define HSIO_S1G_DFT_CFG2_RX_WAVEFORM_SEL BIT(2) +#define HSIO_S1G_DFT_CFG2_RX_FREQOFF_DIR BIT(1) +#define HSIO_S1G_DFT_CFG2_RX_FREQOFF_ENA BIT(0) + +#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_ENA BIT(20) +#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH(x) (((x) << 16) & GENMASK(17, 16)) +#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH_M GENMASK(17, 16) +#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH_X(x) (((x) & GENMASK(17, 16)) >> 16) +#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_HIGH(x) (((x) << 8) & GENMASK(15, 8)) +#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_HIGH_M GENMASK(15, 8) +#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_HIGH_X(x) (((x) & GENMASK(15, 8)) >> 8) +#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_LOW(x) ((x) & GENMASK(7, 0)) +#define HSIO_S1G_RC_PLL_BIST_CFG_PLL_BIST_LOW_M GENMASK(7, 0) + +#define HSIO_S1G_MISC_CFG_DES_100FX_KICK_MODE(x) (((x) << 11) & GENMASK(12, 11)) +#define HSIO_S1G_MISC_CFG_DES_100FX_KICK_MODE_M GENMASK(12, 11) +#define HSIO_S1G_MISC_CFG_DES_100FX_KICK_MODE_X(x) (((x) & GENMASK(12, 11)) >> 11) +#define HSIO_S1G_MISC_CFG_DES_100FX_CPMD_SWAP BIT(10) +#define HSIO_S1G_MISC_CFG_DES_100FX_CPMD_MODE BIT(9) +#define HSIO_S1G_MISC_CFG_DES_100FX_CPMD_ENA BIT(8) +#define HSIO_S1G_MISC_CFG_RX_LPI_MODE_ENA BIT(5) +#define HSIO_S1G_MISC_CFG_TX_LPI_MODE_ENA BIT(4) +#define HSIO_S1G_MISC_CFG_RX_DATA_INV_ENA BIT(3) +#define HSIO_S1G_MISC_CFG_TX_DATA_INV_ENA BIT(2) +#define HSIO_S1G_MISC_CFG_LANE_RST BIT(0) + +#define HSIO_S1G_DFT_STATUS_PLL_BIST_NOT_DONE BIT(7) +#define HSIO_S1G_DFT_STATUS_PLL_BIST_FAILED BIT(6) +#define HSIO_S1G_DFT_STATUS_PLL_BIST_TIMEOUT_ERR BIT(5) +#define HSIO_S1G_DFT_STATUS_BIST_ACTIVE BIT(3) +#define HSIO_S1G_DFT_STATUS_BIST_NOSYNC BIT(2) +#define HSIO_S1G_DFT_STATUS_BIST_COMPLETE_N BIT(1) +#define HSIO_S1G_DFT_STATUS_BIST_ERROR BIT(0) + +#define HSIO_S1G_MISC_STATUS_DES_100FX_PHASE_SEL BIT(0) + +#define HSIO_MCB_S1G_ADDR_CFG_SERDES1G_WR_ONE_SHOT BIT(31) +#define HSIO_MCB_S1G_ADDR_CFG_SERDES1G_RD_ONE_SHOT BIT(30) +#define HSIO_MCB_S1G_ADDR_CFG_SERDES1G_ADDR(x) ((x) & GENMASK(8, 0)) +#define HSIO_MCB_S1G_ADDR_CFG_SERDES1G_ADDR_M GENMASK(8, 0) + +#define HSIO_S6G_DIG_CFG_GP(x) (((x) << 16) & GENMASK(18, 16)) +#define HSIO_S6G_DIG_CFG_GP_M GENMASK(18, 16) +#define HSIO_S6G_DIG_CFG_GP_X(x) (((x) & GENMASK(18, 16)) >> 16) +#define HSIO_S6G_DIG_CFG_TX_BIT_DOUBLING_MODE_ENA BIT(7) +#define HSIO_S6G_DIG_CFG_SIGDET_TESTMODE BIT(6) +#define HSIO_S6G_DIG_CFG_SIGDET_AST(x) (((x) << 3) & GENMASK(5, 3)) +#define HSIO_S6G_DIG_CFG_SIGDET_AST_M GENMASK(5, 3) +#define HSIO_S6G_DIG_CFG_SIGDET_AST_X(x) (((x) & GENMASK(5, 3)) >> 3) +#define HSIO_S6G_DIG_CFG_SIGDET_DST(x) ((x) & GENMASK(2, 0)) +#define HSIO_S6G_DIG_CFG_SIGDET_DST_M GENMASK(2, 0) + +#define HSIO_S6G_DFT_CFG0_LAZYBIT BIT(31) +#define HSIO_S6G_DFT_CFG0_INV_DIS BIT(23) +#define HSIO_S6G_DFT_CFG0_PRBS_SEL(x) (((x) << 20) & GENMASK(21, 20)) +#define HSIO_S6G_DFT_CFG0_PRBS_SEL_M GENMASK(21, 20) +#define HSIO_S6G_DFT_CFG0_PRBS_SEL_X(x) (((x) & GENMASK(21, 20)) >> 20) +#define HSIO_S6G_DFT_CFG0_TEST_MODE(x) (((x) << 16) & GENMASK(18, 16)) +#define HSIO_S6G_DFT_CFG0_TEST_MODE_M GENMASK(18, 16) +#define HSIO_S6G_DFT_CFG0_TEST_MODE_X(x) (((x) & GENMASK(18, 16)) >> 16) +#define HSIO_S6G_DFT_CFG0_RX_PHS_CORR_DIS BIT(4) +#define HSIO_S6G_DFT_CFG0_RX_PDSENS_ENA BIT(3) +#define HSIO_S6G_DFT_CFG0_RX_DFT_ENA BIT(2) +#define HSIO_S6G_DFT_CFG0_TX_DFT_ENA BIT(0) + +#define HSIO_S6G_DFT_CFG1_TX_JITTER_AMPL(x) (((x) << 8) & GENMASK(17, 8)) +#define HSIO_S6G_DFT_CFG1_TX_JITTER_AMPL_M GENMASK(17, 8) +#define HSIO_S6G_DFT_CFG1_TX_JITTER_AMPL_X(x) (((x) & GENMASK(17, 8)) >> 8) +#define HSIO_S6G_DFT_CFG1_TX_STEP_FREQ(x) (((x) << 4) & GENMASK(7, 4)) +#define HSIO_S6G_DFT_CFG1_TX_STEP_FREQ_M GENMASK(7, 4) +#define HSIO_S6G_DFT_CFG1_TX_STEP_FREQ_X(x) (((x) & GENMASK(7, 4)) >> 4) +#define HSIO_S6G_DFT_CFG1_TX_JI_ENA BIT(3) +#define HSIO_S6G_DFT_CFG1_TX_WAVEFORM_SEL BIT(2) +#define HSIO_S6G_DFT_CFG1_TX_FREQOFF_DIR BIT(1) +#define HSIO_S6G_DFT_CFG1_TX_FREQOFF_ENA BIT(0) + +#define HSIO_S6G_DFT_CFG2_RX_JITTER_AMPL(x) (((x) << 8) & GENMASK(17, 8)) +#define HSIO_S6G_DFT_CFG2_RX_JITTER_AMPL_M GENMASK(17, 8) +#define HSIO_S6G_DFT_CFG2_RX_JITTER_AMPL_X(x) (((x) & GENMASK(17, 8)) >> 8) +#define HSIO_S6G_DFT_CFG2_RX_STEP_FREQ(x) (((x) << 4) & GENMASK(7, 4)) +#define HSIO_S6G_DFT_CFG2_RX_STEP_FREQ_M GENMASK(7, 4) +#define HSIO_S6G_DFT_CFG2_RX_STEP_FREQ_X(x) (((x) & GENMASK(7, 4)) >> 4) +#define HSIO_S6G_DFT_CFG2_RX_JI_ENA BIT(3) +#define HSIO_S6G_DFT_CFG2_RX_WAVEFORM_SEL BIT(2) +#define HSIO_S6G_DFT_CFG2_RX_FREQOFF_DIR BIT(1) +#define HSIO_S6G_DFT_CFG2_RX_FREQOFF_ENA BIT(0) + +#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_ENA BIT(20) +#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH(x) (((x) << 16) & GENMASK(19, 16)) +#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH_M GENMASK(19, 16) +#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_FBS_HIGH_X(x) (((x) & GENMASK(19, 16)) >> 16) +#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_HIGH(x) (((x) << 8) & GENMASK(15, 8)) +#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_HIGH_M GENMASK(15, 8) +#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_HIGH_X(x) (((x) & GENMASK(15, 8)) >> 8) +#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_LOW(x) ((x) & GENMASK(7, 0)) +#define HSIO_S6G_RC_PLL_BIST_CFG_PLL_BIST_LOW_M GENMASK(7, 0) + +#define HSIO_S6G_MISC_CFG_SEL_RECO_CLK(x) (((x) << 13) & GENMASK(14, 13)) +#define HSIO_S6G_MISC_CFG_SEL_RECO_CLK_M GENMASK(14, 13) +#define HSIO_S6G_MISC_CFG_SEL_RECO_CLK_X(x) (((x) & GENMASK(14, 13)) >> 13) +#define HSIO_S6G_MISC_CFG_DES_100FX_KICK_MODE(x) (((x) << 11) & GENMASK(12, 11)) +#define HSIO_S6G_MISC_CFG_DES_100FX_KICK_MODE_M GENMASK(12, 11) +#define HSIO_S6G_MISC_CFG_DES_100FX_KICK_MODE_X(x) (((x) & GENMASK(12, 11)) >> 11) +#define HSIO_S6G_MISC_CFG_DES_100FX_CPMD_SWAP BIT(10) +#define HSIO_S6G_MISC_CFG_DES_100FX_CPMD_MODE BIT(9) +#define HSIO_S6G_MISC_CFG_DES_100FX_CPMD_ENA BIT(8) +#define HSIO_S6G_MISC_CFG_RX_BUS_FLIP_ENA BIT(7) +#define HSIO_S6G_MISC_CFG_TX_BUS_FLIP_ENA BIT(6) +#define HSIO_S6G_MISC_CFG_RX_LPI_MODE_ENA BIT(5) +#define HSIO_S6G_MISC_CFG_TX_LPI_MODE_ENA BIT(4) +#define HSIO_S6G_MISC_CFG_RX_DATA_INV_ENA BIT(3) +#define HSIO_S6G_MISC_CFG_TX_DATA_INV_ENA BIT(2) +#define HSIO_S6G_MISC_CFG_LANE_RST BIT(0) + +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST0(x) (((x) << 23) & GENMASK(28, 23)) +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST0_M GENMASK(28, 23) +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST0_X(x) (((x) & GENMASK(28, 23)) >> 23) +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST1(x) (((x) << 18) & GENMASK(22, 18)) +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST1_M GENMASK(22, 18) +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_POST1_X(x) (((x) & GENMASK(22, 18)) >> 18) +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_PREC(x) (((x) << 13) & GENMASK(17, 13)) +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_PREC_M GENMASK(17, 13) +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_PREC_X(x) (((x) & GENMASK(17, 13)) >> 13) +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_ENA_CAS(x) (((x) << 6) & GENMASK(8, 6)) +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_ENA_CAS_M GENMASK(8, 6) +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_ENA_CAS_X(x) (((x) & GENMASK(8, 6)) >> 6) +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_LEV(x) ((x) & GENMASK(5, 0)) +#define HSIO_S6G_OB_ANEG_CFG_AN_OB_LEV_M GENMASK(5, 0) + +#define HSIO_S6G_DFT_STATUS_PRBS_SYNC_STAT BIT(8) +#define HSIO_S6G_DFT_STATUS_PLL_BIST_NOT_DONE BIT(7) +#define HSIO_S6G_DFT_STATUS_PLL_BIST_FAILED BIT(6) +#define HSIO_S6G_DFT_STATUS_PLL_BIST_TIMEOUT_ERR BIT(5) +#define HSIO_S6G_DFT_STATUS_BIST_ACTIVE BIT(3) +#define HSIO_S6G_DFT_STATUS_BIST_NOSYNC BIT(2) +#define HSIO_S6G_DFT_STATUS_BIST_COMPLETE_N BIT(1) +#define HSIO_S6G_DFT_STATUS_BIST_ERROR BIT(0) + +#define HSIO_S6G_MISC_STATUS_DES_100FX_PHASE_SEL BIT(0) + +#define HSIO_S6G_DES_CFG_DES_PHS_CTRL(x) (((x) << 13) & GENMASK(16, 13)) +#define HSIO_S6G_DES_CFG_DES_PHS_CTRL_M GENMASK(16, 13) +#define HSIO_S6G_DES_CFG_DES_PHS_CTRL_X(x) (((x) & GENMASK(16, 13)) >> 13) +#define HSIO_S6G_DES_CFG_DES_MBTR_CTRL(x) (((x) << 10) & GENMASK(12, 10)) +#define HSIO_S6G_DES_CFG_DES_MBTR_CTRL_M GENMASK(12, 10) +#define HSIO_S6G_DES_CFG_DES_MBTR_CTRL_X(x) (((x) & GENMASK(12, 10)) >> 10) +#define HSIO_S6G_DES_CFG_DES_CPMD_SEL(x) (((x) << 8) & GENMASK(9, 8)) +#define HSIO_S6G_DES_CFG_DES_CPMD_SEL_M GENMASK(9, 8) +#define HSIO_S6G_DES_CFG_DES_CPMD_SEL_X(x) (((x) & GENMASK(9, 8)) >> 8) +#define HSIO_S6G_DES_CFG_DES_BW_HYST(x) (((x) << 5) & GENMASK(7, 5)) +#define HSIO_S6G_DES_CFG_DES_BW_HYST_M GENMASK(7, 5) +#define HSIO_S6G_DES_CFG_DES_BW_HYST_X(x) (((x) & GENMASK(7, 5)) >> 5) +#define HSIO_S6G_DES_CFG_DES_SWAP_HYST BIT(4) +#define HSIO_S6G_DES_CFG_DES_BW_ANA(x) (((x) << 1) & GENMASK(3, 1)) +#define HSIO_S6G_DES_CFG_DES_BW_ANA_M GENMASK(3, 1) +#define HSIO_S6G_DES_CFG_DES_BW_ANA_X(x) (((x) & GENMASK(3, 1)) >> 1) +#define HSIO_S6G_DES_CFG_DES_SWAP_ANA BIT(0) + +#define HSIO_S6G_IB_CFG_IB_SOFSI(x) (((x) << 29) & GENMASK(30, 29)) +#define HSIO_S6G_IB_CFG_IB_SOFSI_M GENMASK(30, 29) +#define HSIO_S6G_IB_CFG_IB_SOFSI_X(x) (((x) & GENMASK(30, 29)) >> 29) +#define HSIO_S6G_IB_CFG_IB_VBULK_SEL BIT(28) +#define HSIO_S6G_IB_CFG_IB_RTRM_ADJ(x) (((x) << 24) & GENMASK(27, 24)) +#define HSIO_S6G_IB_CFG_IB_RTRM_ADJ_M GENMASK(27, 24) +#define HSIO_S6G_IB_CFG_IB_RTRM_ADJ_X(x) (((x) & GENMASK(27, 24)) >> 24) +#define HSIO_S6G_IB_CFG_IB_ICML_ADJ(x) (((x) << 20) & GENMASK(23, 20)) +#define HSIO_S6G_IB_CFG_IB_ICML_ADJ_M GENMASK(23, 20) +#define HSIO_S6G_IB_CFG_IB_ICML_ADJ_X(x) (((x) & GENMASK(23, 20)) >> 20) +#define HSIO_S6G_IB_CFG_IB_TERM_MODE_SEL(x) (((x) << 18) & GENMASK(19, 18)) +#define HSIO_S6G_IB_CFG_IB_TERM_MODE_SEL_M GENMASK(19, 18) +#define HSIO_S6G_IB_CFG_IB_TERM_MODE_SEL_X(x) (((x) & GENMASK(19, 18)) >> 18) +#define HSIO_S6G_IB_CFG_IB_SIG_DET_CLK_SEL(x) (((x) << 15) & GENMASK(17, 15)) +#define HSIO_S6G_IB_CFG_IB_SIG_DET_CLK_SEL_M GENMASK(17, 15) +#define HSIO_S6G_IB_CFG_IB_SIG_DET_CLK_SEL_X(x) (((x) & GENMASK(17, 15)) >> 15) +#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_HP(x) (((x) << 13) & GENMASK(14, 13)) +#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_HP_M GENMASK(14, 13) +#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_HP_X(x) (((x) & GENMASK(14, 13)) >> 13) +#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_MID(x) (((x) << 11) & GENMASK(12, 11)) +#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_MID_M GENMASK(12, 11) +#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_MID_X(x) (((x) & GENMASK(12, 11)) >> 11) +#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_LP(x) (((x) << 9) & GENMASK(10, 9)) +#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_LP_M GENMASK(10, 9) +#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_LP_X(x) (((x) & GENMASK(10, 9)) >> 9) +#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_OFFSET(x) (((x) << 7) & GENMASK(8, 7)) +#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_OFFSET_M GENMASK(8, 7) +#define HSIO_S6G_IB_CFG_IB_REG_PAT_SEL_OFFSET_X(x) (((x) & GENMASK(8, 7)) >> 7) +#define HSIO_S6G_IB_CFG_IB_ANA_TEST_ENA BIT(6) +#define HSIO_S6G_IB_CFG_IB_SIG_DET_ENA BIT(5) +#define HSIO_S6G_IB_CFG_IB_CONCUR BIT(4) +#define HSIO_S6G_IB_CFG_IB_CAL_ENA BIT(3) +#define HSIO_S6G_IB_CFG_IB_SAM_ENA BIT(2) +#define HSIO_S6G_IB_CFG_IB_EQZ_ENA BIT(1) +#define HSIO_S6G_IB_CFG_IB_REG_ENA BIT(0) + +#define HSIO_S6G_IB_CFG1_IB_TJTAG(x) (((x) << 17) & GENMASK(21, 17)) +#define HSIO_S6G_IB_CFG1_IB_TJTAG_M GENMASK(21, 17) +#define HSIO_S6G_IB_CFG1_IB_TJTAG_X(x) (((x) & GENMASK(21, 17)) >> 17) +#define HSIO_S6G_IB_CFG1_IB_TSDET(x) (((x) << 12) & GENMASK(16, 12)) +#define HSIO_S6G_IB_CFG1_IB_TSDET_M GENMASK(16, 12) +#define HSIO_S6G_IB_CFG1_IB_TSDET_X(x) (((x) & GENMASK(16, 12)) >> 12) +#define HSIO_S6G_IB_CFG1_IB_SCALY(x) (((x) << 8) & GENMASK(11, 8)) +#define HSIO_S6G_IB_CFG1_IB_SCALY_M GENMASK(11, 8) +#define HSIO_S6G_IB_CFG1_IB_SCALY_X(x) (((x) & GENMASK(11, 8)) >> 8) +#define HSIO_S6G_IB_CFG1_IB_FILT_HP BIT(7) +#define HSIO_S6G_IB_CFG1_IB_FILT_MID BIT(6) +#define HSIO_S6G_IB_CFG1_IB_FILT_LP BIT(5) +#define HSIO_S6G_IB_CFG1_IB_FILT_OFFSET BIT(4) +#define HSIO_S6G_IB_CFG1_IB_FRC_HP BIT(3) +#define HSIO_S6G_IB_CFG1_IB_FRC_MID BIT(2) +#define HSIO_S6G_IB_CFG1_IB_FRC_LP BIT(1) +#define HSIO_S6G_IB_CFG1_IB_FRC_OFFSET BIT(0) + +#define HSIO_S6G_IB_CFG2_IB_TINFV(x) (((x) << 27) & GENMASK(29, 27)) +#define HSIO_S6G_IB_CFG2_IB_TINFV_M GENMASK(29, 27) +#define HSIO_S6G_IB_CFG2_IB_TINFV_X(x) (((x) & GENMASK(29, 27)) >> 27) +#define HSIO_S6G_IB_CFG2_IB_OINFI(x) (((x) << 22) & GENMASK(26, 22)) +#define HSIO_S6G_IB_CFG2_IB_OINFI_M GENMASK(26, 22) +#define HSIO_S6G_IB_CFG2_IB_OINFI_X(x) (((x) & GENMASK(26, 22)) >> 22) +#define HSIO_S6G_IB_CFG2_IB_TAUX(x) (((x) << 19) & GENMASK(21, 19)) +#define HSIO_S6G_IB_CFG2_IB_TAUX_M GENMASK(21, 19) +#define HSIO_S6G_IB_CFG2_IB_TAUX_X(x) (((x) & GENMASK(21, 19)) >> 19) +#define HSIO_S6G_IB_CFG2_IB_OINFS(x) (((x) << 16) & GENMASK(18, 16)) +#define HSIO_S6G_IB_CFG2_IB_OINFS_M GENMASK(18, 16) +#define HSIO_S6G_IB_CFG2_IB_OINFS_X(x) (((x) & GENMASK(18, 16)) >> 16) +#define HSIO_S6G_IB_CFG2_IB_OCALS(x) (((x) << 10) & GENMASK(15, 10)) +#define HSIO_S6G_IB_CFG2_IB_OCALS_M GENMASK(15, 10) +#define HSIO_S6G_IB_CFG2_IB_OCALS_X(x) (((x) & GENMASK(15, 10)) >> 10) +#define HSIO_S6G_IB_CFG2_IB_TCALV(x) (((x) << 5) & GENMASK(9, 5)) +#define HSIO_S6G_IB_CFG2_IB_TCALV_M GENMASK(9, 5) +#define HSIO_S6G_IB_CFG2_IB_TCALV_X(x) (((x) & GENMASK(9, 5)) >> 5) +#define HSIO_S6G_IB_CFG2_IB_UMAX(x) (((x) << 3) & GENMASK(4, 3)) +#define HSIO_S6G_IB_CFG2_IB_UMAX_M GENMASK(4, 3) +#define HSIO_S6G_IB_CFG2_IB_UMAX_X(x) (((x) & GENMASK(4, 3)) >> 3) +#define HSIO_S6G_IB_CFG2_IB_UREG(x) ((x) & GENMASK(2, 0)) +#define HSIO_S6G_IB_CFG2_IB_UREG_M GENMASK(2, 0) + +#define HSIO_S6G_IB_CFG3_IB_INI_HP(x) (((x) << 18) & GENMASK(23, 18)) +#define HSIO_S6G_IB_CFG3_IB_INI_HP_M GENMASK(23, 18) +#define HSIO_S6G_IB_CFG3_IB_INI_HP_X(x) (((x) & GENMASK(23, 18)) >> 18) +#define HSIO_S6G_IB_CFG3_IB_INI_MID(x) (((x) << 12) & GENMASK(17, 12)) +#define HSIO_S6G_IB_CFG3_IB_INI_MID_M GENMASK(17, 12) +#define HSIO_S6G_IB_CFG3_IB_INI_MID_X(x) (((x) & GENMASK(17, 12)) >> 12) +#define HSIO_S6G_IB_CFG3_IB_INI_LP(x) (((x) << 6) & GENMASK(11, 6)) +#define HSIO_S6G_IB_CFG3_IB_INI_LP_M GENMASK(11, 6) +#define HSIO_S6G_IB_CFG3_IB_INI_LP_X(x) (((x) & GENMASK(11, 6)) >> 6) +#define HSIO_S6G_IB_CFG3_IB_INI_OFFSET(x) ((x) & GENMASK(5, 0)) +#define HSIO_S6G_IB_CFG3_IB_INI_OFFSET_M GENMASK(5, 0) + +#define HSIO_S6G_IB_CFG4_IB_MAX_HP(x) (((x) << 18) & GENMASK(23, 18)) +#define HSIO_S6G_IB_CFG4_IB_MAX_HP_M GENMASK(23, 18) +#define HSIO_S6G_IB_CFG4_IB_MAX_HP_X(x) (((x) & GENMASK(23, 18)) >> 18) +#define HSIO_S6G_IB_CFG4_IB_MAX_MID(x) (((x) << 12) & GENMASK(17, 12)) +#define HSIO_S6G_IB_CFG4_IB_MAX_MID_M GENMASK(17, 12) +#define HSIO_S6G_IB_CFG4_IB_MAX_MID_X(x) (((x) & GENMASK(17, 12)) >> 12) +#define HSIO_S6G_IB_CFG4_IB_MAX_LP(x) (((x) << 6) & GENMASK(11, 6)) +#define HSIO_S6G_IB_CFG4_IB_MAX_LP_M GENMASK(11, 6) +#define HSIO_S6G_IB_CFG4_IB_MAX_LP_X(x) (((x) & GENMASK(11, 6)) >> 6) +#define HSIO_S6G_IB_CFG4_IB_MAX_OFFSET(x) ((x) & GENMASK(5, 0)) +#define HSIO_S6G_IB_CFG4_IB_MAX_OFFSET_M GENMASK(5, 0) + +#define HSIO_S6G_IB_CFG5_IB_MIN_HP(x) (((x) << 18) & GENMASK(23, 18)) +#define HSIO_S6G_IB_CFG5_IB_MIN_HP_M GENMASK(23, 18) +#define HSIO_S6G_IB_CFG5_IB_MIN_HP_X(x) (((x) & GENMASK(23, 18)) >> 18) +#define HSIO_S6G_IB_CFG5_IB_MIN_MID(x) (((x) << 12) & GENMASK(17, 12)) +#define HSIO_S6G_IB_CFG5_IB_MIN_MID_M GENMASK(17, 12) +#define HSIO_S6G_IB_CFG5_IB_MIN_MID_X(x) (((x) & GENMASK(17, 12)) >> 12) +#define HSIO_S6G_IB_CFG5_IB_MIN_LP(x) (((x) << 6) & GENMASK(11, 6)) +#define HSIO_S6G_IB_CFG5_IB_MIN_LP_M GENMASK(11, 6) +#define HSIO_S6G_IB_CFG5_IB_MIN_LP_X(x) (((x) & GENMASK(11, 6)) >> 6) +#define HSIO_S6G_IB_CFG5_IB_MIN_OFFSET(x) ((x) & GENMASK(5, 0)) +#define HSIO_S6G_IB_CFG5_IB_MIN_OFFSET_M GENMASK(5, 0) + +#define HSIO_S6G_OB_CFG_OB_IDLE BIT(31) +#define HSIO_S6G_OB_CFG_OB_ENA1V_MODE BIT(30) +#define HSIO_S6G_OB_CFG_OB_POL BIT(29) +#define HSIO_S6G_OB_CFG_OB_POST0(x) (((x) << 23) & GENMASK(28, 23)) +#define HSIO_S6G_OB_CFG_OB_POST0_M GENMASK(28, 23) +#define HSIO_S6G_OB_CFG_OB_POST0_X(x) (((x) & GENMASK(28, 23)) >> 23) +#define HSIO_S6G_OB_CFG_OB_PREC(x) (((x) << 18) & GENMASK(22, 18)) +#define HSIO_S6G_OB_CFG_OB_PREC_M GENMASK(22, 18) +#define HSIO_S6G_OB_CFG_OB_PREC_X(x) (((x) & GENMASK(22, 18)) >> 18) +#define HSIO_S6G_OB_CFG_OB_R_ADJ_MUX BIT(17) +#define HSIO_S6G_OB_CFG_OB_R_ADJ_PDR BIT(16) +#define HSIO_S6G_OB_CFG_OB_POST1(x) (((x) << 11) & GENMASK(15, 11)) +#define HSIO_S6G_OB_CFG_OB_POST1_M GENMASK(15, 11) +#define HSIO_S6G_OB_CFG_OB_POST1_X(x) (((x) & GENMASK(15, 11)) >> 11) +#define HSIO_S6G_OB_CFG_OB_R_COR BIT(10) +#define HSIO_S6G_OB_CFG_OB_SEL_RCTRL BIT(9) +#define HSIO_S6G_OB_CFG_OB_SR_H BIT(8) +#define HSIO_S6G_OB_CFG_OB_SR(x) (((x) << 4) & GENMASK(7, 4)) +#define HSIO_S6G_OB_CFG_OB_SR_M GENMASK(7, 4) +#define HSIO_S6G_OB_CFG_OB_SR_X(x) (((x) & GENMASK(7, 4)) >> 4) +#define HSIO_S6G_OB_CFG_OB_RESISTOR_CTRL(x) ((x) & GENMASK(3, 0)) +#define HSIO_S6G_OB_CFG_OB_RESISTOR_CTRL_M GENMASK(3, 0) + +#define HSIO_S6G_OB_CFG1_OB_ENA_CAS(x) (((x) << 6) & GENMASK(8, 6)) +#define HSIO_S6G_OB_CFG1_OB_ENA_CAS_M GENMASK(8, 6) +#define HSIO_S6G_OB_CFG1_OB_ENA_CAS_X(x) (((x) & GENMASK(8, 6)) >> 6) +#define HSIO_S6G_OB_CFG1_OB_LEV(x) ((x) & GENMASK(5, 0)) +#define HSIO_S6G_OB_CFG1_OB_LEV_M GENMASK(5, 0) + +#define HSIO_S6G_SER_CFG_SER_4TAP_ENA BIT(8) +#define HSIO_S6G_SER_CFG_SER_CPMD_SEL BIT(7) +#define HSIO_S6G_SER_CFG_SER_SWAP_CPMD BIT(6) +#define HSIO_S6G_SER_CFG_SER_ALISEL(x) (((x) << 4) & GENMASK(5, 4)) +#define HSIO_S6G_SER_CFG_SER_ALISEL_M GENMASK(5, 4) +#define HSIO_S6G_SER_CFG_SER_ALISEL_X(x) (((x) & GENMASK(5, 4)) >> 4) +#define HSIO_S6G_SER_CFG_SER_ENHYS BIT(3) +#define HSIO_S6G_SER_CFG_SER_BIG_WIN BIT(2) +#define HSIO_S6G_SER_CFG_SER_EN_WIN BIT(1) +#define HSIO_S6G_SER_CFG_SER_ENALI BIT(0) + +#define HSIO_S6G_COMMON_CFG_SYS_RST BIT(17) +#define HSIO_S6G_COMMON_CFG_SE_DIV2_ENA BIT(16) +#define HSIO_S6G_COMMON_CFG_SE_AUTO_SQUELCH_ENA BIT(15) +#define HSIO_S6G_COMMON_CFG_ENA_LANE BIT(14) +#define HSIO_S6G_COMMON_CFG_PWD_RX BIT(13) +#define HSIO_S6G_COMMON_CFG_PWD_TX BIT(12) +#define HSIO_S6G_COMMON_CFG_LANE_CTRL(x) (((x) << 9) & GENMASK(11, 9)) +#define HSIO_S6G_COMMON_CFG_LANE_CTRL_M GENMASK(11, 9) +#define HSIO_S6G_COMMON_CFG_LANE_CTRL_X(x) (((x) & GENMASK(11, 9)) >> 9) +#define HSIO_S6G_COMMON_CFG_ENA_DIRECT BIT(8) +#define HSIO_S6G_COMMON_CFG_ENA_ELOOP BIT(7) +#define HSIO_S6G_COMMON_CFG_ENA_FLOOP BIT(6) +#define HSIO_S6G_COMMON_CFG_ENA_ILOOP BIT(5) +#define HSIO_S6G_COMMON_CFG_ENA_PLOOP BIT(4) +#define HSIO_S6G_COMMON_CFG_HRATE BIT(3) +#define HSIO_S6G_COMMON_CFG_QRATE BIT(2) +#define HSIO_S6G_COMMON_CFG_IF_MODE(x) ((x) & GENMASK(1, 0)) +#define HSIO_S6G_COMMON_CFG_IF_MODE_M GENMASK(1, 0) + +#define HSIO_S6G_PLL_CFG_PLL_ENA_OFFS(x) (((x) << 16) & GENMASK(17, 16)) +#define HSIO_S6G_PLL_CFG_PLL_ENA_OFFS_M GENMASK(17, 16) +#define HSIO_S6G_PLL_CFG_PLL_ENA_OFFS_X(x) (((x) & GENMASK(17, 16)) >> 16) +#define HSIO_S6G_PLL_CFG_PLL_DIV4 BIT(15) +#define HSIO_S6G_PLL_CFG_PLL_ENA_ROT BIT(14) +#define HSIO_S6G_PLL_CFG_PLL_FSM_CTRL_DATA(x) (((x) << 6) & GENMASK(13, 6)) +#define HSIO_S6G_PLL_CFG_PLL_FSM_CTRL_DATA_M GENMASK(13, 6) +#define HSIO_S6G_PLL_CFG_PLL_FSM_CTRL_DATA_X(x) (((x) & GENMASK(13, 6)) >> 6) +#define HSIO_S6G_PLL_CFG_PLL_FSM_ENA BIT(5) +#define HSIO_S6G_PLL_CFG_PLL_FSM_FORCE_SET_ENA BIT(4) +#define HSIO_S6G_PLL_CFG_PLL_FSM_OOR_RECAL_ENA BIT(3) +#define HSIO_S6G_PLL_CFG_PLL_RB_DATA_SEL BIT(2) +#define HSIO_S6G_PLL_CFG_PLL_ROT_DIR BIT(1) +#define HSIO_S6G_PLL_CFG_PLL_ROT_FRQ BIT(0) + +#define HSIO_S6G_ACJTAG_CFG_ACJTAG_INIT_DATA_N BIT(5) +#define HSIO_S6G_ACJTAG_CFG_ACJTAG_INIT_DATA_P BIT(4) +#define HSIO_S6G_ACJTAG_CFG_ACJTAG_INIT_CLK BIT(3) +#define HSIO_S6G_ACJTAG_CFG_OB_DIRECT BIT(2) +#define HSIO_S6G_ACJTAG_CFG_ACJTAG_ENA BIT(1) +#define HSIO_S6G_ACJTAG_CFG_JTAG_CTRL_ENA BIT(0) + +#define HSIO_S6G_GP_CFG_GP_MSB(x) (((x) << 16) & GENMASK(31, 16)) +#define HSIO_S6G_GP_CFG_GP_MSB_M GENMASK(31, 16) +#define HSIO_S6G_GP_CFG_GP_MSB_X(x) (((x) & GENMASK(31, 16)) >> 16) +#define HSIO_S6G_GP_CFG_GP_LSB(x) ((x) & GENMASK(15, 0)) +#define HSIO_S6G_GP_CFG_GP_LSB_M GENMASK(15, 0) + +#define HSIO_S6G_IB_STATUS0_IB_CAL_DONE BIT(8) +#define HSIO_S6G_IB_STATUS0_IB_HP_GAIN_ACT BIT(7) +#define HSIO_S6G_IB_STATUS0_IB_MID_GAIN_ACT BIT(6) +#define HSIO_S6G_IB_STATUS0_IB_LP_GAIN_ACT BIT(5) +#define HSIO_S6G_IB_STATUS0_IB_OFFSET_ACT BIT(4) +#define HSIO_S6G_IB_STATUS0_IB_OFFSET_VLD BIT(3) +#define HSIO_S6G_IB_STATUS0_IB_OFFSET_ERR BIT(2) +#define HSIO_S6G_IB_STATUS0_IB_OFFSDIR BIT(1) +#define HSIO_S6G_IB_STATUS0_IB_SIG_DET BIT(0) + +#define HSIO_S6G_IB_STATUS1_IB_HP_GAIN_STAT(x) (((x) << 18) & GENMASK(23, 18)) +#define HSIO_S6G_IB_STATUS1_IB_HP_GAIN_STAT_M GENMASK(23, 18) +#define HSIO_S6G_IB_STATUS1_IB_HP_GAIN_STAT_X(x) (((x) & GENMASK(23, 18)) >> 18) +#define HSIO_S6G_IB_STATUS1_IB_MID_GAIN_STAT(x) (((x) << 12) & GENMASK(17, 12)) +#define HSIO_S6G_IB_STATUS1_IB_MID_GAIN_STAT_M GENMASK(17, 12) +#define HSIO_S6G_IB_STATUS1_IB_MID_GAIN_STAT_X(x) (((x) & GENMASK(17, 12)) >> 12) +#define HSIO_S6G_IB_STATUS1_IB_LP_GAIN_STAT(x) (((x) << 6) & GENMASK(11, 6)) +#define HSIO_S6G_IB_STATUS1_IB_LP_GAIN_STAT_M GENMASK(11, 6) +#define HSIO_S6G_IB_STATUS1_IB_LP_GAIN_STAT_X(x) (((x) & GENMASK(11, 6)) >> 6) +#define HSIO_S6G_IB_STATUS1_IB_OFFSET_STAT(x) ((x) & GENMASK(5, 0)) +#define HSIO_S6G_IB_STATUS1_IB_OFFSET_STAT_M GENMASK(5, 0) + +#define HSIO_S6G_ACJTAG_STATUS_ACJTAG_CAPT_DATA_N BIT(2) +#define HSIO_S6G_ACJTAG_STATUS_ACJTAG_CAPT_DATA_P BIT(1) +#define HSIO_S6G_ACJTAG_STATUS_IB_DIRECT BIT(0) + +#define HSIO_S6G_PLL_STATUS_PLL_CAL_NOT_DONE BIT(10) +#define HSIO_S6G_PLL_STATUS_PLL_CAL_ERR BIT(9) +#define HSIO_S6G_PLL_STATUS_PLL_OUT_OF_RANGE_ERR BIT(8) +#define HSIO_S6G_PLL_STATUS_PLL_RB_DATA(x) ((x) & GENMASK(7, 0)) +#define HSIO_S6G_PLL_STATUS_PLL_RB_DATA_M GENMASK(7, 0) + +#define HSIO_S6G_REVID_SERDES_REV(x) (((x) << 26) & GENMASK(31, 26)) +#define HSIO_S6G_REVID_SERDES_REV_M GENMASK(31, 26) +#define HSIO_S6G_REVID_SERDES_REV_X(x) (((x) & GENMASK(31, 26)) >> 26) +#define HSIO_S6G_REVID_RCPLL_REV(x) (((x) << 21) & GENMASK(25, 21)) +#define HSIO_S6G_REVID_RCPLL_REV_M GENMASK(25, 21) +#define HSIO_S6G_REVID_RCPLL_REV_X(x) (((x) & GENMASK(25, 21)) >> 21) +#define HSIO_S6G_REVID_SER_REV(x) (((x) << 16) & GENMASK(20, 16)) +#define HSIO_S6G_REVID_SER_REV_M GENMASK(20, 16) +#define HSIO_S6G_REVID_SER_REV_X(x) (((x) & GENMASK(20, 16)) >> 16) +#define HSIO_S6G_REVID_DES_REV(x) (((x) << 10) & GENMASK(15, 10)) +#define HSIO_S6G_REVID_DES_REV_M GENMASK(15, 10) +#define HSIO_S6G_REVID_DES_REV_X(x) (((x) & GENMASK(15, 10)) >> 10) +#define HSIO_S6G_REVID_OB_REV(x) (((x) << 5) & GENMASK(9, 5)) +#define HSIO_S6G_REVID_OB_REV_M GENMASK(9, 5) +#define HSIO_S6G_REVID_OB_REV_X(x) (((x) & GENMASK(9, 5)) >> 5) +#define HSIO_S6G_REVID_IB_REV(x) ((x) & GENMASK(4, 0)) +#define HSIO_S6G_REVID_IB_REV_M GENMASK(4, 0) + +#define HSIO_MCB_S6G_ADDR_CFG_SERDES6G_WR_ONE_SHOT BIT(31) +#define HSIO_MCB_S6G_ADDR_CFG_SERDES6G_RD_ONE_SHOT BIT(30) +#define HSIO_MCB_S6G_ADDR_CFG_SERDES6G_ADDR(x) ((x) & GENMASK(24, 0)) +#define HSIO_MCB_S6G_ADDR_CFG_SERDES6G_ADDR_M GENMASK(24, 0) + +#define HSIO_HW_CFG_DEV2G5_10_MODE BIT(6) +#define HSIO_HW_CFG_DEV1G_9_MODE BIT(5) +#define HSIO_HW_CFG_DEV1G_6_MODE BIT(4) +#define HSIO_HW_CFG_DEV1G_5_MODE BIT(3) +#define HSIO_HW_CFG_DEV1G_4_MODE BIT(2) +#define HSIO_HW_CFG_PCIE_ENA BIT(1) +#define HSIO_HW_CFG_QSGMII_ENA BIT(0) + +#define HSIO_HW_QSGMII_CFG_SHYST_DIS BIT(3) +#define HSIO_HW_QSGMII_CFG_E_DET_ENA BIT(2) +#define HSIO_HW_QSGMII_CFG_USE_I1_ENA BIT(1) +#define HSIO_HW_QSGMII_CFG_FLIP_LANES BIT(0) + +#define HSIO_HW_QSGMII_STAT_DELAY_VAR_X200PS(x) (((x) << 1) & GENMASK(6, 1)) +#define HSIO_HW_QSGMII_STAT_DELAY_VAR_X200PS_M GENMASK(6, 1) +#define HSIO_HW_QSGMII_STAT_DELAY_VAR_X200PS_X(x) (((x) & GENMASK(6, 1)) >> 1) +#define HSIO_HW_QSGMII_STAT_SYNC BIT(0) + +#define HSIO_CLK_CFG_CLKDIV_PHY(x) (((x) << 1) & GENMASK(8, 1)) +#define HSIO_CLK_CFG_CLKDIV_PHY_M GENMASK(8, 1) +#define HSIO_CLK_CFG_CLKDIV_PHY_X(x) (((x) & GENMASK(8, 1)) >> 1) +#define HSIO_CLK_CFG_CLKDIV_PHY_DIS BIT(0) + +#define HSIO_TEMP_SENSOR_CTRL_FORCE_TEMP_RD BIT(5) +#define HSIO_TEMP_SENSOR_CTRL_FORCE_RUN BIT(4) +#define HSIO_TEMP_SENSOR_CTRL_FORCE_NO_RST BIT(3) +#define HSIO_TEMP_SENSOR_CTRL_FORCE_POWER_UP BIT(2) +#define HSIO_TEMP_SENSOR_CTRL_FORCE_CLK BIT(1) +#define HSIO_TEMP_SENSOR_CTRL_SAMPLE_ENA BIT(0) + +#define HSIO_TEMP_SENSOR_CFG_RUN_WID(x) (((x) << 8) & GENMASK(15, 8)) +#define HSIO_TEMP_SENSOR_CFG_RUN_WID_M GENMASK(15, 8) +#define HSIO_TEMP_SENSOR_CFG_RUN_WID_X(x) (((x) & GENMASK(15, 8)) >> 8) +#define HSIO_TEMP_SENSOR_CFG_SAMPLE_PER(x) ((x) & GENMASK(7, 0)) +#define HSIO_TEMP_SENSOR_CFG_SAMPLE_PER_M GENMASK(7, 0) + +#define HSIO_TEMP_SENSOR_STAT_TEMP_VALID BIT(8) +#define HSIO_TEMP_SENSOR_STAT_TEMP(x) ((x) & GENMASK(7, 0)) +#define HSIO_TEMP_SENSOR_STAT_TEMP_M GENMASK(7, 0) + +#endif -- cgit v1.2.3 From 66c213233308ac94fabed80b09041eb14f8d787b Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Thu, 4 Oct 2018 14:22:02 +0200 Subject: net: mscc: ocelot: simplify register access for PLL5 configuration Since HSIO address space can be accessed by different drivers, let's simplify the register address definitions so that it can be easily used by all drivers and put the register address definition in the include/soc/mscc/ocelot_hsio.h header file. Reviewed-by: Florian Fainelli Acked-by: Alexandre Belloni Signed-off-by: Quentin Schulz Signed-off-by: David S. Miller --- include/soc/mscc/ocelot_hsio.h | 74 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) (limited to 'include') diff --git a/include/soc/mscc/ocelot_hsio.h b/include/soc/mscc/ocelot_hsio.h index d93ddec3931b..43112dd7313a 100644 --- a/include/soc/mscc/ocelot_hsio.h +++ b/include/soc/mscc/ocelot_hsio.h @@ -8,6 +8,80 @@ #ifndef _MSCC_OCELOT_HSIO_H_ #define _MSCC_OCELOT_HSIO_H_ +#define HSIO_PLL5G_CFG0 0x0000 +#define HSIO_PLL5G_CFG1 0x0004 +#define HSIO_PLL5G_CFG2 0x0008 +#define HSIO_PLL5G_CFG3 0x000c +#define HSIO_PLL5G_CFG4 0x0010 +#define HSIO_PLL5G_CFG5 0x0014 +#define HSIO_PLL5G_CFG6 0x0018 +#define HSIO_PLL5G_STATUS0 0x001c +#define HSIO_PLL5G_STATUS1 0x0020 +#define HSIO_PLL5G_BIST_CFG0 0x0024 +#define HSIO_PLL5G_BIST_CFG1 0x0028 +#define HSIO_PLL5G_BIST_CFG2 0x002c +#define HSIO_PLL5G_BIST_STAT0 0x0030 +#define HSIO_PLL5G_BIST_STAT1 0x0034 +#define HSIO_RCOMP_CFG0 0x0038 +#define HSIO_RCOMP_STATUS 0x003c +#define HSIO_SYNC_ETH_CFG 0x0040 +#define HSIO_SYNC_ETH_PLL_CFG 0x0048 +#define HSIO_S1G_DES_CFG 0x004c +#define HSIO_S1G_IB_CFG 0x0050 +#define HSIO_S1G_OB_CFG 0x0054 +#define HSIO_S1G_SER_CFG 0x0058 +#define HSIO_S1G_COMMON_CFG 0x005c +#define HSIO_S1G_PLL_CFG 0x0060 +#define HSIO_S1G_PLL_STATUS 0x0064 +#define HSIO_S1G_DFT_CFG0 0x0068 +#define HSIO_S1G_DFT_CFG1 0x006c +#define HSIO_S1G_DFT_CFG2 0x0070 +#define HSIO_S1G_TP_CFG 0x0074 +#define HSIO_S1G_RC_PLL_BIST_CFG 0x0078 +#define HSIO_S1G_MISC_CFG 0x007c +#define HSIO_S1G_DFT_STATUS 0x0080 +#define HSIO_S1G_MISC_STATUS 0x0084 +#define HSIO_MCB_S1G_ADDR_CFG 0x0088 +#define HSIO_S6G_DIG_CFG 0x008c +#define HSIO_S6G_DFT_CFG0 0x0090 +#define HSIO_S6G_DFT_CFG1 0x0094 +#define HSIO_S6G_DFT_CFG2 0x0098 +#define HSIO_S6G_TP_CFG0 0x009c +#define HSIO_S6G_TP_CFG1 0x00a0 +#define HSIO_S6G_RC_PLL_BIST_CFG 0x00a4 +#define HSIO_S6G_MISC_CFG 0x00a8 +#define HSIO_S6G_OB_ANEG_CFG 0x00ac +#define HSIO_S6G_DFT_STATUS 0x00b0 +#define HSIO_S6G_ERR_CNT 0x00b4 +#define HSIO_S6G_MISC_STATUS 0x00b8 +#define HSIO_S6G_DES_CFG 0x00bc +#define HSIO_S6G_IB_CFG 0x00c0 +#define HSIO_S6G_IB_CFG1 0x00c4 +#define HSIO_S6G_IB_CFG2 0x00c8 +#define HSIO_S6G_IB_CFG3 0x00cc +#define HSIO_S6G_IB_CFG4 0x00d0 +#define HSIO_S6G_IB_CFG5 0x00d4 +#define HSIO_S6G_OB_CFG 0x00d8 +#define HSIO_S6G_OB_CFG1 0x00dc +#define HSIO_S6G_SER_CFG 0x00e0 +#define HSIO_S6G_COMMON_CFG 0x00e4 +#define HSIO_S6G_PLL_CFG 0x00e8 +#define HSIO_S6G_ACJTAG_CFG 0x00ec +#define HSIO_S6G_GP_CFG 0x00f0 +#define HSIO_S6G_IB_STATUS0 0x00f4 +#define HSIO_S6G_IB_STATUS1 0x00f8 +#define HSIO_S6G_ACJTAG_STATUS 0x00fc +#define HSIO_S6G_PLL_STATUS 0x0100 +#define HSIO_S6G_REVID 0x0104 +#define HSIO_MCB_S6G_ADDR_CFG 0x0108 +#define HSIO_HW_CFG 0x010c +#define HSIO_HW_QSGMII_CFG 0x0110 +#define HSIO_HW_QSGMII_STAT 0x0114 +#define HSIO_CLK_CFG 0x0118 +#define HSIO_TEMP_SENSOR_CTRL 0x011c +#define HSIO_TEMP_SENSOR_CFG 0x0120 +#define HSIO_TEMP_SENSOR_STAT 0x0124 + #define HSIO_PLL5G_CFG0_ENA_ROT BIT(31) #define HSIO_PLL5G_CFG0_ENA_LANE BIT(30) #define HSIO_PLL5G_CFG0_ENA_CLKTREE BIT(29) -- cgit v1.2.3 From c2a90025ad09d830c8d8ae69f485eac6aaaa2472 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Thu, 4 Oct 2018 14:22:03 +0200 Subject: phy: add QSGMII and PCIE modes Prepare for upcoming phys that'll handle QSGMII or PCIe. Reviewed-by: Florian Fainelli Signed-off-by: Quentin Schulz Signed-off-by: David S. Miller --- include/linux/phy/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 9713aebdd348..03b319f89a34 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -37,9 +37,11 @@ enum phy_mode { PHY_MODE_USB_OTG, PHY_MODE_SGMII, PHY_MODE_2500SGMII, + PHY_MODE_QSGMII, PHY_MODE_10GKR, PHY_MODE_UFS_HS_A, PHY_MODE_UFS_HS_B, + PHY_MODE_PCIE, }; /** -- cgit v1.2.3 From b68fc09be48edbc47de1a0f3d42ef8adf6c0ac55 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Thu, 4 Oct 2018 14:22:06 +0200 Subject: dt-bindings: add constants for Microsemi Ocelot SerDes driver The Microsemi Ocelot has multiple SerDes and requires that the SerDes be muxed accordingly to the hardware representation. Let's add a constant for each SerDes available in the Microsemi Ocelot. Reviewed-by: Rob Herring Signed-off-by: Quentin Schulz Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/dt-bindings/phy/phy-ocelot-serdes.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/dt-bindings/phy/phy-ocelot-serdes.h (limited to 'include') diff --git a/include/dt-bindings/phy/phy-ocelot-serdes.h b/include/dt-bindings/phy/phy-ocelot-serdes.h new file mode 100644 index 000000000000..bd28f21206f6 --- /dev/null +++ b/include/dt-bindings/phy/phy-ocelot-serdes.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* Copyright (c) 2018 Microsemi Corporation */ +#ifndef __PHY_OCELOT_SERDES_H__ +#define __PHY_OCELOT_SERDES_H__ + +#define SERDES1G(x) (x) +#define SERDES1G_MAX SERDES1G(5) +#define SERDES6G(x) (SERDES1G_MAX + 1 + (x)) +#define SERDES6G_MAX SERDES6G(2) +#define SERDES_MAX SERDES6G_MAX + +#endif -- cgit v1.2.3 From c941ce9c282cc606e6517356fcc186a9da2b4ab9 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Sun, 7 Oct 2018 12:56:47 +0100 Subject: bpf: add verifier callback to get stack usage info for offloaded progs In preparation for BPF-to-BPF calls in offloaded programs, add a new function attribute to the struct bpf_prog_offload_ops so that drivers supporting eBPF offload can hook at the end of program verification, and potentially extract information collected by the verifier. Implement a minimal callback (returning 0) in the drivers providing the structs, namely netdevsim and nfp. This will be useful in the nfp driver, in later commits, to extract the number of subprograms as well as the stack depth for those subprograms. Signed-off-by: Quentin Monnet Reviewed-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 1 + include/linux/bpf_verifier.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 027697b6a22f..9b558713447f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -263,6 +263,7 @@ struct bpf_verifier_ops { struct bpf_prog_offload_ops { int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); + int (*finalize)(struct bpf_verifier_env *env); }; struct bpf_prog_offload { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7b6fd2ab3263..9e8056ec20fa 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -245,5 +245,6 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); +int bpf_prog_offload_finalize(struct bpf_verifier_env *env); #endif /* _LINUX_BPF_VERIFIER_H */ -- cgit v1.2.3 From 9494a6c2e4f6ce21a1e6885145171f90c4492131 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Mon, 8 Oct 2018 12:14:41 +0200 Subject: dt-bindings: net: vsc8531: add two additional LED modes for VSC8584 The VSC8584 (and most likely other PHYs in the same generation) has two additional LED modes that can be picked, so let's add them. Reviewed-by: Rob Herring Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: Quentin Schulz Signed-off-by: David S. Miller --- include/dt-bindings/net/mscc-phy-vsc8531.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/net/mscc-phy-vsc8531.h b/include/dt-bindings/net/mscc-phy-vsc8531.h index 697161f80eb5..9eb2ec2b2ea9 100644 --- a/include/dt-bindings/net/mscc-phy-vsc8531.h +++ b/include/dt-bindings/net/mscc-phy-vsc8531.h @@ -18,9 +18,11 @@ #define VSC8531_LINK_100_1000_ACTIVITY 4 #define VSC8531_LINK_10_1000_ACTIVITY 5 #define VSC8531_LINK_10_100_ACTIVITY 6 +#define VSC8584_LINK_100FX_1000X_ACTIVITY 7 #define VSC8531_DUPLEX_COLLISION 8 #define VSC8531_COLLISION 9 #define VSC8531_ACTIVITY 10 +#define VSC8584_100FX_1000X_ACTIVITY 11 #define VSC8531_AUTONEG_FAULT 12 #define VSC8531_SERIAL_MODE 13 #define VSC8531_FORCE_LED_OFF 14 -- cgit v1.2.3 From 4a19edb60d0203cd5bf95a8b46ea8f63fd41194c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 7 Oct 2018 20:16:22 -0700 Subject: netlink: Pass extack to dump handlers Declare extack in netlink_dump and pass to dump handlers via netlink_callback. Add any extack message after the dump_done_errno allowing error messages to be returned. This will be useful when strict checking is done on dump requests, returning why the dump fails EINVAL. Signed-off-by: David Ahern Acked-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 71f121b66ca8..88c8a2d83eb3 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -176,6 +176,7 @@ struct netlink_callback { void *data; /* the module that dump function belong to */ struct module *module; + struct netlink_ext_ack *extack; u16 family; u16 min_dump_alloc; unsigned int prev_seq, seq; -- cgit v1.2.3 From 3d0d4337d7a105c5e8ba85c6e7b75437b4c6745e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 7 Oct 2018 20:16:23 -0700 Subject: netlink: Add extack message to nlmsg_parse for invalid header length Give a user a reason why EINVAL is returned in nlmsg_parse. Signed-off-by: David Ahern Acked-by: Christian Brauner Signed-off-by: David S. Miller --- include/net/netlink.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 589683091f16..9522a0bf1f3a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -516,8 +516,10 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, const struct nla_policy *policy, struct netlink_ext_ack *extack) { - if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) { + NL_SET_ERR_MSG(extack, "Invalid header length"); return -EINVAL; + } return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen), policy, extack); -- cgit v1.2.3 From a5f6cba291654168e6ab73c3e7ff5b27371c4cb9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 7 Oct 2018 20:16:25 -0700 Subject: netlink: Add strict version of nlmsg_parse and nla_parse nla_parse is currently lenient on message parsing, allowing type to be 0 or greater than max expected and only logging a message "netlink: %d bytes leftover after parsing attributes in process `%s'." if the netlink message has unknown data at the end after parsing. What this could mean is that the header at the front of the attributes is actually wrong and the parsing is shifted from what is expected. Add a new strict version that actually fails with EINVAL if there are any bytes remaining after the parsing loop completes, if the atttrbitue type is 0 or greater than max expected. Signed-off-by: David Ahern Acked-by: Christian Brauner Signed-off-by: David S. Miller --- include/net/netlink.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 9522a0bf1f3a..f1db8e594847 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -373,6 +373,9 @@ int nla_validate(const struct nlattr *head, int len, int maxtype, int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, int len, const struct nla_policy *policy, struct netlink_ext_ack *extack); +int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head, + int len, const struct nla_policy *policy, + struct netlink_ext_ack *extack); int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); @@ -525,6 +528,20 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, nlmsg_attrlen(nlh, hdrlen), policy, extack); } +static inline int nlmsg_parse_strict(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) { + NL_SET_ERR_MSG(extack, "Invalid header length"); + return -EINVAL; + } + + return nla_parse_strict(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), policy, extack); +} + /** * nlmsg_find_attr - find a specific attribute in a netlink message * @nlh: netlink message header -- cgit v1.2.3 From 89d35528d17d25819a755a2b52931e911baebc66 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 7 Oct 2018 20:16:27 -0700 Subject: netlink: Add new socket option to enable strict checking on dumps Add a new socket option, NETLINK_DUMP_STRICT_CHK, that userspace can use via setsockopt to request strict checking of headers and attributes on dump requests. To get dump features such as kernel side filtering based on data in the header or attributes appended to the dump request, userspace must call setsockopt() for NETLINK_DUMP_STRICT_CHK and a non-zero value. Since the netlink sock and its flags are private to the af_netlink code, the strict checking flag is passed to dump handlers via a flag in the netlink_callback struct. For old userspace on new kernel there is no impact as all of the data checks in later patches are wrapped in a check on the new strict flag. For new userspace on old kernel, the setsockopt will fail and even if new userspace sets data in the headers and appended attributes the kernel will silently ignore it. Moving forward when the setsockopt succeeds, the new userspace on old kernel means the dump request can pass an attribute the kernel does not understand. The dump will then fail as the older kernel does not understand it. New userspace on new kernel setting the socket option gets the benefit of the improved data dump. Kernel side the NETLINK_DUMP_STRICT_CHK uapi is converted to a generic NETLINK_F_STRICT_CHK flag which can potentially be leveraged for tighter checking on the NEW, DEL, and SET commands. Signed-off-by: David Ahern Acked-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + include/uapi/linux/netlink.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 88c8a2d83eb3..72580f1a72a2 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -179,6 +179,7 @@ struct netlink_callback { struct netlink_ext_ack *extack; u16 family; u16 min_dump_alloc; + bool strict_check; unsigned int prev_seq, seq; long args[6]; }; diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 776bc92e9118..486ed1f0c0bc 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -155,6 +155,7 @@ enum nlmsgerr_attrs { #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 #define NETLINK_EXT_ACK 11 +#define NETLINK_DUMP_STRICT_CHK 12 struct nl_pktinfo { __u32 group; -- cgit v1.2.3 From e8ba330ac0c55004e775eab53fa1e748e5d71bdb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 7 Oct 2018 20:16:35 -0700 Subject: rtnetlink: Update fib dumps for strict data checking Add helper to check netlink message for route dumps. If the strict flag is set the dump request is expected to have an rtmsg struct as the header. All elements of the struct are expected to be 0 with the exception of rtm_flags (which is used by both ipv4 and ipv6 dumps) and no attributes can be appended. rtm_flags can only have RTM_F_CLONED and RTM_F_PREFIX set. Update inet_dump_fib, inet6_dump_fib, mpls_dump_routes, ipmr_rtm_dumproute, and ip6mr_rtm_dumproute to call this helper if strict data checking is enabled. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f7c109e37298..9846b79c9ee1 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -452,4 +452,6 @@ static inline void fib_proc_exit(struct net *net) u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); +int ip_valid_fib_dump_req(const struct nlmsghdr *nlh, + struct netlink_ext_ack *extack); #endif /* _NET_FIB_H */ -- cgit v1.2.3 From f6a8a19bb11b46d60250ddc4e3e1ba6aa166f488 Mon Sep 17 00:00:00 2001 From: Denis Drozdov Date: Tue, 14 Aug 2018 14:08:51 +0300 Subject: RDMA/netdev: Hoist alloc_netdev_mqs out of the driver netdev has several interfaces that expect to call alloc_netdev_mqs from the core code, with the driver only providing the arguments. This is incompatible with the rdma_netdev interface that returns the netdev directly. Thus re-organize the API used by ipoib so that the verbs core code calls alloc_netdev_mqs for the driver. This is done by allowing the drivers to provide the allocation parameters via a 'get_params' callback and then initializing an allocated netdev as a second step. Fixes: cd565b4b51e5 ("IB/IPoIB: Support acceleration options callbacks") Signed-off-by: Jason Gunthorpe Signed-off-by: Denis Drozdov Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 14 ++++---------- include/rdma/ib_verbs.h | 23 +++++++++++++++++++++-- 2 files changed, 25 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 26a92462f4ce..4b75796cac23 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1228,21 +1228,15 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); -#ifndef CONFIG_MLX5_CORE_IPOIB -static inline -struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, - struct ib_device *ibdev, - const char *name, - void (*setup)(struct net_device *)) -{ - return ERR_PTR(-EOPNOTSUPP); -} -#else +#ifdef CONFIG_MLX5_CORE_IPOIB struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, struct ib_device *ibdev, const char *name, void (*setup)(struct net_device *)); #endif /* CONFIG_MLX5_CORE_IPOIB */ +int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev, + struct ib_device *device, + struct rdma_netdev_alloc_params *params); struct mlx5_profile { u64 mask; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e950c2a68f06..020216cee8f1 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2223,6 +2223,16 @@ struct rdma_netdev { union ib_gid *gid, u16 mlid); }; +struct rdma_netdev_alloc_params { + size_t sizeof_priv; + unsigned int txqs; + unsigned int rxqs; + void *param; + + int (*initialize_rdma_netdev)(struct ib_device *device, u8 port_num, + struct net_device *netdev, void *param); +}; + struct ib_port_pkey_list { /* Lock to hold while modifying the list. */ spinlock_t list_lock; @@ -2523,8 +2533,8 @@ struct ib_device { /** * rdma netdev operation * - * Driver implementing alloc_rdma_netdev must return -EOPNOTSUPP if it - * doesn't support the specified rdma netdev type. + * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params + * must return -EOPNOTSUPP if it doesn't support the specified type. */ struct net_device *(*alloc_rdma_netdev)( struct ib_device *device, @@ -2534,6 +2544,10 @@ struct ib_device { unsigned char name_assign_type, void (*setup)(struct net_device *)); + int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num, + enum rdma_netdev_t type, + struct rdma_netdev_alloc_params *params); + struct module *owner; struct device dev; struct kobject *ports_parent; @@ -4179,4 +4193,9 @@ struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile); int uverbs_destroy_def_handler(struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs); + +struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num, + enum rdma_netdev_t type, const char *name, + unsigned char name_assign_type, + void (*setup)(struct net_device *)); #endif /* IB_VERBS_H */ -- cgit v1.2.3 From 5d6b0cb3369df425de75c94c98eb3f1a86659022 Mon Sep 17 00:00:00 2001 From: Denis Drozdov Date: Tue, 14 Aug 2018 14:22:35 +0300 Subject: RDMA/netdev: Fix netlink support in IPoIB IPoIB netlink support was broken by the below commit since integrating the rdma_netdev support relies on an allocation flow for netdevs that was controlled by the ipoib driver while netdev's rtnl_newlink implementation assumes that the netdev will be allocated by netlink. Such situation leads to crash in __ipoib_device_add, once trying to reuse netlink device. This patch fixes the kernel oops for both mlx4 and mlx5 devices triggered by the following command: Fixes: cd565b4b51e5 ("IB/IPoIB: Support acceleration options callbacks") Signed-off-by: Denis Drozdov Signed-off-by: Jason Gunthorpe Signed-off-by: Feras Daoud Signed-off-by: Saeed Mahameed --- include/rdma/ib_verbs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 020216cee8f1..0ed5d913a492 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4198,4 +4198,11 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *)); + +int rdma_init_netdev(struct ib_device *device, u8 port_num, + enum rdma_netdev_t type, const char *name, + unsigned char name_assign_type, + void (*setup)(struct net_device *), + struct net_device *netdev); + #endif /* IB_VERBS_H */ -- cgit v1.2.3 From ed792e28c4bd09e9a319d2ad914aa62982cb4c4a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 8 Oct 2018 14:06:34 -0700 Subject: net/ipv6: Make ipv6_route_table_template static ipv6_route_table_template is exported but there are no users outside of route.c. Make it static. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ff33f498c137..829650540780 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1089,8 +1089,6 @@ static inline int snmp6_unregister_dev(struct inet6_dev *idev) { return 0; } #endif #ifdef CONFIG_SYSCTL -extern struct ctl_table ipv6_route_table_template[]; - struct ctl_table *ipv6_icmp_sysctl_init(struct net *net); struct ctl_table *ipv6_route_sysctl_init(struct net *net); int ipv6_sysctl_register(void); -- cgit v1.2.3 From f458e832ba510f843807fc2c2906a8fb59554c9f Mon Sep 17 00:00:00 2001 From: Chaitanya T K Date: Sat, 6 Oct 2018 19:34:59 +0200 Subject: mac80211: minstrel: Enable STBC and LDPC for VHT Rates If peer support reception of STBC and LDPC, enable them for better performance. Signed-off-by: Chaitanya TK Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index c4809ad8ab46..0ef67f837ae1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1670,6 +1670,7 @@ struct ieee80211_mu_edca_param_set { #define IEEE80211_VHT_CAP_RXSTBC_3 0x00000300 #define IEEE80211_VHT_CAP_RXSTBC_4 0x00000400 #define IEEE80211_VHT_CAP_RXSTBC_MASK 0x00000700 +#define IEEE80211_VHT_CAP_RXSTBC_SHIFT 8 #define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE 0x00000800 #define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE 0x00001000 #define IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT 13 -- cgit v1.2.3 From bc847970f43281cb07c9f7d0897ee08cd1e08cf3 Mon Sep 17 00:00:00 2001 From: Pradeep Kumar Chitrapu Date: Wed, 3 Oct 2018 20:19:20 -0700 Subject: mac80211: support FTM responder configuration/statistics New bss param ftm_responder is used to notify the driver to enable fine timing request (FTM) responder role in AP mode. Plumb the new cfg80211 API for FTM responder statistics through to the driver API in mac80211. Signed-off-by: David Spinadel Signed-off-by: Johannes Berg Signed-off-by: Pradeep Kumar Chitrapu Signed-off-by: Johannes Berg --- include/net/mac80211.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c4fadbafbf21..2ccd4d1bef89 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -309,6 +309,8 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_KEEP_ALIVE: keep alive options (idle period or protected * keep alive) changed. * @BSS_CHANGED_MCAST_RATE: Multicast Rate setting changed for this interface + * @BSS_CHANGED_FTM_RESPONDER: fime timing reasurement request responder + * functionality changed for this BSS (AP mode). * */ enum ieee80211_bss_change { @@ -338,6 +340,7 @@ enum ieee80211_bss_change { BSS_CHANGED_MU_GROUPS = 1<<23, BSS_CHANGED_KEEP_ALIVE = 1<<24, BSS_CHANGED_MCAST_RATE = 1<<25, + BSS_CHANGED_FTM_RESPONDER = 1<<26, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -463,6 +466,21 @@ struct ieee80211_mu_group_data { u8 position[WLAN_USER_POSITION_LEN]; }; +/** + * ieee80211_ftm_responder_params - FTM responder parameters + * + * @lci: LCI subelement content + * @civicloc: CIVIC location subelement content + * @lci_len: LCI data length + * @civicloc_len: Civic data length + */ +struct ieee80211_ftm_responder_params { + const u8 *lci; + const u8 *civicloc; + size_t lci_len; + size_t civicloc_len; +}; + /** * struct ieee80211_bss_conf - holds the BSS's changing parameters * @@ -562,6 +580,9 @@ struct ieee80211_mu_group_data { * @protected_keep_alive: if set, indicates that the station should send an RSN * protected frame to the AP to reset the idle timer at the AP for the * station. + * @ftm_responder: whether to enable or disable fine timing measurement FTM + * responder functionality. + * @ftmr_params: configurable lci/civic parameter when enabling FTM responder. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -612,6 +633,8 @@ struct ieee80211_bss_conf { bool allow_p2p_go_ps; u16 max_idle_period; bool protected_keep_alive; + bool ftm_responder; + struct ieee80211_ftm_responder_params *ftmr_params; }; /** @@ -3598,6 +3621,8 @@ enum ieee80211_reconfig_type { * aggregating two specific frames in the same A-MSDU. The relation * between the skbs should be symmetric and transitive. Note that while * skb is always a real frame, head may or may not be an A-MSDU. + * @get_ftm_responder_stats: Retrieve FTM responder statistics, if available. + * Statistics should be cumulative, currently no way to reset is provided. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -3883,6 +3908,9 @@ struct ieee80211_ops { bool (*can_aggregate_in_amsdu)(struct ieee80211_hw *hw, struct sk_buff *head, struct sk_buff *skb); + int (*get_ftm_responder_stats)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct cfg80211_ftm_responder_stats *ftm_stats); }; /** -- cgit v1.2.3 From 0d4e14a32dcab9c4bd559d02874120fbb86b1322 Mon Sep 17 00:00:00 2001 From: Ankita Bajaj Date: Thu, 27 Sep 2018 18:01:57 +0300 Subject: nl80211: Add per peer statistics to compute FCS error rate Add support for drivers to report the total number of MPDUs received and the number of MPDUs received with an FCS error from a specific peer. These counters will be incremented only when the TA of the frame matches the MAC address of the peer irrespective of FCS error. It should be noted that the TA field in the frame might be corrupted when there is an FCS error and TA matching logic would fail in such cases. Hence, FCS error counter might not be fully accurate, but it can provide help in detecting bad RX links in significant number of cases. This FCS error counter without full accuracy can be used, e.g., to trigger a kick-out of a connected client with a bad link in AP mode to force such a client to roam to another AP. Signed-off-by: Ankita Bajaj Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 +++++++ include/uapi/linux/nl80211.h | 8 ++++++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0e16e723dcef..1fa41b7a1be3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1303,6 +1303,10 @@ struct cfg80211_tid_stats { * @ack_signal: signal strength (in dBm) of the last ACK frame. * @avg_ack_signal: average rssi value of ack packet for the no of msdu's has * been sent. + * @rx_mpdu_count: number of MPDUs received from this station + * @fcs_err_count: number of packets (MPDUs) received from this station with + * an FCS error. This counter should be incremented only when TA of the + * received packet with an FCS error matches the peer MAC address. */ struct station_info { u64 filled; @@ -1349,6 +1353,9 @@ struct station_info { struct cfg80211_tid_stats *pertid; s8 ack_signal; s8 avg_ack_signal; + + u32 rx_mpdu_count; + u32 fcs_err_count; }; #if IS_ENABLED(CONFIG_CFG80211) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index dc6d5a1ef470..6d610bae30a9 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3068,6 +3068,12 @@ enum nl80211_sta_bss_param { * @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment * @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm) * @NL80211_STA_INFO_ACK_SIGNAL_AVG: avg signal strength of ACK frames (s8, dBm) + * @NL80211_STA_INFO_RX_MPDUS: total number of received packets (MPDUs) + * (u32, from this station) + * @NL80211_STA_INFO_FCS_ERROR_COUNT: total number of packets (MPDUs) received + * with an FCS error (u32, from this station). This count may not include + * some packets with an FCS error due to TA corruption. Hence this counter + * might not be fully accurate. * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -3108,6 +3114,8 @@ enum nl80211_sta_info { NL80211_STA_INFO_PAD, NL80211_STA_INFO_ACK_SIGNAL, NL80211_STA_INFO_ACK_SIGNAL_AVG, + NL80211_STA_INFO_RX_MPDUS, + NL80211_STA_INFO_FCS_ERROR_COUNT, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, -- cgit v1.2.3 From f8252e7b5a83deee0e477fc1e31e3f06ceb35d28 Mon Sep 17 00:00:00 2001 From: Anilkumar Kolli Date: Thu, 11 Oct 2018 18:15:03 +0530 Subject: mac80211: implement ieee80211_tx_rate_update to update rate Current mac80211 has provision to update tx status through ieee80211_tx_status() and ieee80211_tx_status_ext(). But drivers like ath10k updates the tx status from the skb except txrate, txrate will be updated from a different path, peer stats. Using ieee80211_tx_status_ext() in two different paths (one for the stats, one for the tx rate) would duplicate the stats instead. To avoid this stats duplication, ieee80211_tx_rate_update() is implemented. Signed-off-by: Anilkumar Kolli [minor commit message editing, use initializers in code] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2ccd4d1bef89..71985e95d2d9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4379,6 +4379,21 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, u32 thr); +/** + * ieee80211_tx_rate_update - transmit rate update callback + * + * Drivers should call this functions with a non-NULL pub sta + * This function can be used in drivers that does not have provision + * in updating the tx rate in data path. + * + * @hw: the hardware the frame was transmitted by + * @pubsta: the station to update the tx rate for. + * @info: tx status information + */ +void ieee80211_tx_rate_update(struct ieee80211_hw *hw, + struct ieee80211_sta *pubsta, + struct ieee80211_tx_info *info); + /** * ieee80211_tx_status - transmit status callback * -- cgit v1.2.3 From 7c6bb7d2faaf1ed7d78bafd712476e4cf2cf0d7d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 11 Oct 2018 20:17:21 -0700 Subject: net/ipv6: Add knob to skip DELROUTE message on device down Another difference between IPv4 and IPv6 is the generation of RTM_DELROUTE notifications when a device is taken down (admin down) or deleted. IPv4 does not generate a message for routes evicted by the down or delete; IPv6 does. A NOS at scale really needs to avoid these messages and have IPv4 and IPv6 behave similarly, relying on userspace to handle link notifications and evict the routes. At this point existing user behavior needs to be preserved. Since notifications are a global action (not per app) the only way to preserve existing behavior and allow the messages to be skipped is to add a new sysctl (net/ipv6/route/skip_notify_on_dev_down) which can be set to disable the notificatioons. IPv6 route code already supports the option to skip the message (it is used for multipath routes for example). Besides the new sysctl we need to pass the skip_notify setting through the generic fib6_clean and fib6_walk functions to fib6_clean_node and to set skip_notify on calls to __ip_del_rt for the addrconf_ifdown path. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 3 +++ include/net/netns/ipv6.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index f06e968f1992..caabfd84a098 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -407,6 +407,9 @@ struct fib6_node *fib6_locate(struct fib6_node *root, void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg), void *arg); +void fib6_clean_all_skip_notify(struct net *net, + int (*func)(struct fib6_info *, void *arg), + void *arg); int fib6_add(struct fib6_node *root, struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack); diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index f0e396ab9bec..ef1ed529f33c 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -45,6 +45,7 @@ struct netns_sysctl_ipv6 { int max_dst_opts_len; int max_hbh_opts_len; int seg6_flowlabel; + bool skip_notify_on_dev_down; }; struct netns_ipv6 { -- cgit v1.2.3 From 859bd2ef1fc1110a8031b967ee656c53a6260a76 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 11 Oct 2018 20:33:49 -0700 Subject: net: Evict neighbor entries on carrier down When a link's carrier goes down it could be a sign of the port changing networks. If the new network has overlapping addresses with the old one, then the kernel will continue trying to use neighbor entries established based on the old network until the entries finally age out - meaning a potentially long delay with communications not working. This patch evicts neighbor entries on carrier down with the exception of those marked permanent. Permanent entries are managed by userspace (either an admin or a routing daemon such as FRR). Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 0874f7fcd859..f58b384aa6c9 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -323,6 +323,7 @@ void __neigh_set_probe_once(struct neighbour *neigh); bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev); +int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev); int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb); int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb); int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb); -- cgit v1.2.3 From 9163a0fc1f0c0980f117cc25f4fa6ba9b0750a36 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 12 Oct 2018 13:41:16 +0300 Subject: net: bridge: add support for per-port vlan stats This patch adds an option to have per-port vlan stats instead of the default global stats. The option can be set only when there are no port vlans in the bridge since we need to allocate the stats if it is set when vlans are being added to ports (and respectively free them when being deleted). Also bump RTNL_MAX_TYPE as the bridge is the largest user of options. The current stats design allows us to add these without any changes to the fast-path, it all comes down to the per-vlan stats pointer which, if this option is enabled, will be allocated for each port vlan instead of using the global bridge-wide one. CC: bridge@lists.linux-foundation.org CC: Roopa Prabhu Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 58faab897201..1debfa42cba1 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -287,6 +287,7 @@ enum { IFLA_BR_MCAST_STATS_ENABLED, IFLA_BR_MCAST_IGMP_VERSION, IFLA_BR_MCAST_MLD_VERSION, + IFLA_BR_VLAN_STATS_PER_PORT, __IFLA_BR_MAX, }; -- cgit v1.2.3 From 5886d932e52acfbe12ea5aac8e7c3ad6f16364d1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 12 Oct 2018 12:53:00 +0200 Subject: netlink: replace __NLA_ENSURE implementation We already have BUILD_BUG_ON_ZERO() which I just hadn't found before, so we should use it here instead of open-coding another implementation thereof. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index f1db8e594847..4c1e99303b5a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -311,7 +311,7 @@ struct nla_policy { #define NLA_POLICY_NESTED_ARRAY(maxattr, policy) \ { .type = NLA_NESTED_ARRAY, .validation_data = policy, .len = maxattr } -#define __NLA_ENSURE(condition) (sizeof(char[1 - 2*!(condition)]) - 1) +#define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition)) #define NLA_ENSURE_INT_TYPE(tp) \ (__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 || \ tp == NLA_S16 || tp == NLA_U16 || \ -- cgit v1.2.3 From cc53aabcc283c36274d3f3ce9adc4b40c21d4838 Mon Sep 17 00:00:00 2001 From: Govind Singh Date: Thu, 11 Oct 2018 13:16:01 +0300 Subject: firmware: qcom: scm: Add WLAN VMID for Qualcomm SCM interface Add WLAN related VMID's to support wlan driver to set up the remote's permissions call via TrustZone. Signed-off-by: Govind Singh Reviewed-by: Bjorn Andersson Acked-by: Niklas Cassel Reviewed-by: Brian Norris Signed-off-by: Kalle Valo --- include/linux/qcom_scm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 5d65521260b3..06996ad4f2bc 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved. +/* Copyright (c) 2010-2015, 2018, The Linux Foundation. All rights reserved. * Copyright (C) 2015 Linaro Ltd. * * This program is free software; you can redistribute it and/or modify @@ -33,6 +33,8 @@ struct qcom_scm_vmperm { #define QCOM_SCM_VMID_HLOS 0x3 #define QCOM_SCM_VMID_MSS_MSA 0xF +#define QCOM_SCM_VMID_WLAN 0x18 +#define QCOM_SCM_VMID_WLAN_CE 0x19 #define QCOM_SCM_PERM_READ 0x4 #define QCOM_SCM_PERM_WRITE 0x2 #define QCOM_SCM_PERM_EXEC 0x1 -- cgit v1.2.3 From 571f739083e2544b343b5998608de679519de4e9 Mon Sep 17 00:00:00 2001 From: Mallikarjun Phulari Date: Fri, 5 Oct 2018 14:48:12 +0530 Subject: Bluetooth: Use separate L2CAP LE credit based connection result values Add the result values specific to L2CAP LE credit based connections and change the old result values wherever they were used. Signed-off-by: Mallikarjun Phulari Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 3555440e14fc..ea4b4ec85b78 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -277,12 +277,17 @@ struct l2cap_conn_rsp { #define L2CAP_CR_SEC_BLOCK 0x0003 #define L2CAP_CR_NO_MEM 0x0004 #define L2CAP_CR_BAD_AMP 0x0005 -#define L2CAP_CR_AUTHENTICATION 0x0005 -#define L2CAP_CR_AUTHORIZATION 0x0006 -#define L2CAP_CR_BAD_KEY_SIZE 0x0007 -#define L2CAP_CR_ENCRYPTION 0x0008 -#define L2CAP_CR_INVALID_SCID 0x0009 -#define L2CAP_CR_SCID_IN_USE 0x000A + +/* credit based connect results */ +#define L2CAP_CR_LE_SUCCESS 0x0000 +#define L2CAP_CR_LE_BAD_PSM 0x0002 +#define L2CAP_CR_LE_NO_MEM 0x0004 +#define L2CAP_CR_LE_AUTHENTICATION 0x0005 +#define L2CAP_CR_LE_AUTHORIZATION 0x0006 +#define L2CAP_CR_LE_BAD_KEY_SIZE 0x0007 +#define L2CAP_CR_LE_ENCRYPTION 0x0008 +#define L2CAP_CR_LE_INVALID_SCID 0x0009 +#define L2CAP_CR_LE_SCID_IN_USE 0X000A /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 -- cgit v1.2.3 From dd1a8f8a88eecbc903f9ffff12332bec6d3f3be3 Mon Sep 17 00:00:00 2001 From: Mallikarjun Phulari Date: Fri, 5 Oct 2018 14:48:13 +0530 Subject: Bluetooth: Errata Service Release 8, Erratum 3253 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit L2CAP: New result values 0x0006 - Connection refused – Invalid Source CID 0x0007 - Connection refused – Source CID already allocated As per the ESR08_V1.0.0, 1.11.2 Erratum 3253, Page No. 54, "Remote CID invalid Issue". Applies to Core Specification versions: V5.0, V4.2, v4.1, v4.0, and v3.0 + HS Vol 3, Part A, Section 4.2, 4.3, 4.14, 4.15. Core Specification Version 5.0, Page No.1753, Table 4.6 and Page No. 1767, Table 4.14 New result values are added to l2cap connect/create channel response as 0x0006 - Connection refused – Invalid Source CID 0x0007 - Connection refused – Source CID already allocated Signed-off-by: Mallikarjun Phulari Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index ea4b4ec85b78..093aedebdf0c 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -277,6 +277,8 @@ struct l2cap_conn_rsp { #define L2CAP_CR_SEC_BLOCK 0x0003 #define L2CAP_CR_NO_MEM 0x0004 #define L2CAP_CR_BAD_AMP 0x0005 +#define L2CAP_CR_INVALID_SCID 0x0006 +#define L2CAP_CR_SCID_IN_USE 0x0007 /* credit based connect results */ #define L2CAP_CR_LE_SUCCESS 0x0000 -- cgit v1.2.3 From 1243a51f6c05ecbb2c5c9e02fdcc1e7a06f76f26 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 13 Oct 2018 02:45:57 +0200 Subject: tcp, ulp: remove ulp bits from sockmap In order to prepare sockmap logic to be used in combination with kTLS we need to detangle it from ULP, and further split it in later commits into a generic API. Joint work with John. Signed-off-by: Daniel Borkmann Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 0d2929223c70..8f5cef67fd35 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2057,7 +2057,6 @@ struct tcp_ulp_ops { int tcp_register_ulp(struct tcp_ulp_ops *type); void tcp_unregister_ulp(struct tcp_ulp_ops *type); int tcp_set_ulp(struct sock *sk, const char *name); -int tcp_set_ulp_id(struct sock *sk, const int ulp); void tcp_get_available_ulp(char *buf, size_t len); void tcp_cleanup_ulp(struct sock *sk); -- cgit v1.2.3 From 604326b41a6fb9b4a78b6179335decee0365cd8c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 13 Oct 2018 02:45:58 +0200 Subject: bpf, sockmap: convert to generic sk_msg interface Add a generic sk_msg layer, and convert current sockmap and later kTLS over to make use of it. While sk_buff handles network packet representation from netdevice up to socket, sk_msg handles data representation from application to socket layer. This means that sk_msg framework spans across ULP users in the kernel, and enables features such as introspection or filtering of data with the help of BPF programs that operate on this data structure. Latter becomes in particular useful for kTLS where data encryption is deferred into the kernel, and as such enabling the kernel to perform L7 introspection and policy based on BPF for TLS connections where the record is being encrypted after BPF has run and came to a verdict. In order to get there, first step is to transform open coding of scatter-gather list handling into a common core framework that subsystems can use. The code itself has been split and refactored into three bigger pieces: i) the generic sk_msg API which deals with managing the scatter gather ring, providing helpers for walking and mangling, transferring application data from user space into it, and preparing it for BPF pre/post-processing, ii) the plain sock map itself where sockets can be attached to or detached from; these bits are independent of i) which can now be used also without sock map, and iii) the integration with plain TCP as one protocol to be used for processing L7 application data (later this could e.g. also be extended to other protocols like UDP). The semantics are the same with the old sock map code and therefore no change of user facing behavior or APIs. While pursuing this work it also helped finding a number of bugs in the old sockmap code that we've fixed already in earlier commits. The test_sockmap kselftest suite passes through fine as well. Joint work with John. Signed-off-by: Daniel Borkmann Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 33 ++--- include/linux/bpf_types.h | 2 +- include/linux/filter.h | 21 --- include/linux/skmsg.h | 371 ++++++++++++++++++++++++++++++++++++++++++++++ include/net/tcp.h | 27 ++++ 5 files changed, 410 insertions(+), 44 deletions(-) create mode 100644 include/linux/skmsg.h (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9b558713447f..e60fff48288b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -737,33 +737,18 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) } #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ -#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET) -struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); -struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key); -int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); -int sockmap_get_from_fd(const union bpf_attr *attr, int type, - struct bpf_prog *prog); +#if defined(CONFIG_BPF_STREAM_PARSER) +int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); +int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); #else -static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) -{ - return NULL; -} - -static inline struct sock *__sock_hash_lookup_elem(struct bpf_map *map, - void *key) -{ - return NULL; -} - -static inline int sock_map_prog(struct bpf_map *map, - struct bpf_prog *prog, - u32 type) +static inline int sock_map_prog_update(struct bpf_map *map, + struct bpf_prog *prog, u32 which) { return -EOPNOTSUPP; } -static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type, - struct bpf_prog *prog) +static inline int sock_map_get_from_fd(const union bpf_attr *attr, + struct bpf_prog *prog) { return -EINVAL; } @@ -839,6 +824,10 @@ extern const struct bpf_func_proto bpf_get_stack_proto; extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; +extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; +extern const struct bpf_func_proto bpf_msg_redirect_map_proto; +extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; +extern const struct bpf_func_proto bpf_sk_redirect_map_proto; extern const struct bpf_func_proto bpf_get_local_storage_proto; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 5432f4c9f50e..fa48343a5ea1 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -57,7 +57,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) #ifdef CONFIG_NET BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) -#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET) +#if defined(CONFIG_BPF_STREAM_PARSER) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) #endif diff --git a/include/linux/filter.h b/include/linux/filter.h index 6791a0ac0139..5771874bc01e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -520,24 +520,6 @@ struct bpf_skb_data_end { void *data_end; }; -struct sk_msg_buff { - void *data; - void *data_end; - __u32 apply_bytes; - __u32 cork_bytes; - int sg_copybreak; - int sg_start; - int sg_curr; - int sg_end; - struct scatterlist sg_data[MAX_SKB_FRAGS]; - bool sg_copy[MAX_SKB_FRAGS]; - __u32 flags; - struct sock *sk_redir; - struct sock *sk; - struct sk_buff *skb; - struct list_head list; -}; - struct bpf_redirect_info { u32 ifindex; u32 flags; @@ -833,9 +815,6 @@ void xdp_do_flush_map(void); void bpf_warn_invalid_xdp_action(u32 act); -struct sock *do_sk_redirect_map(struct sk_buff *skb); -struct sock *do_msg_redirect_map(struct sk_msg_buff *md); - #ifdef CONFIG_INET struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, struct bpf_prog *prog, struct sk_buff *skb, diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h new file mode 100644 index 000000000000..95678103c4a0 --- /dev/null +++ b/include/linux/skmsg.h @@ -0,0 +1,371 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */ + +#ifndef _LINUX_SKMSG_H +#define _LINUX_SKMSG_H + +#include +#include +#include +#include + +#include +#include +#include + +#define MAX_MSG_FRAGS MAX_SKB_FRAGS + +enum __sk_action { + __SK_DROP = 0, + __SK_PASS, + __SK_REDIRECT, + __SK_NONE, +}; + +struct sk_msg_sg { + u32 start; + u32 curr; + u32 end; + u32 size; + u32 copybreak; + bool copy[MAX_MSG_FRAGS]; + struct scatterlist data[MAX_MSG_FRAGS]; +}; + +struct sk_msg { + struct sk_msg_sg sg; + void *data; + void *data_end; + u32 apply_bytes; + u32 cork_bytes; + u32 flags; + struct sk_buff *skb; + struct sock *sk_redir; + struct sock *sk; + struct list_head list; +}; + +struct sk_psock_progs { + struct bpf_prog *msg_parser; + struct bpf_prog *skb_parser; + struct bpf_prog *skb_verdict; +}; + +enum sk_psock_state_bits { + SK_PSOCK_TX_ENABLED, +}; + +struct sk_psock_link { + struct list_head list; + struct bpf_map *map; + void *link_raw; +}; + +struct sk_psock_parser { + struct strparser strp; + bool enabled; + void (*saved_data_ready)(struct sock *sk); +}; + +struct sk_psock_work_state { + struct sk_buff *skb; + u32 len; + u32 off; +}; + +struct sk_psock { + struct sock *sk; + struct sock *sk_redir; + u32 apply_bytes; + u32 cork_bytes; + u32 eval; + struct sk_msg *cork; + struct sk_psock_progs progs; + struct sk_psock_parser parser; + struct sk_buff_head ingress_skb; + struct list_head ingress_msg; + unsigned long state; + struct list_head link; + spinlock_t link_lock; + refcount_t refcnt; + void (*saved_unhash)(struct sock *sk); + void (*saved_close)(struct sock *sk, long timeout); + void (*saved_write_space)(struct sock *sk); + struct proto *sk_proto; + struct sk_psock_work_state work_state; + struct work_struct work; + union { + struct rcu_head rcu; + struct work_struct gc; + }; +}; + +int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, + int elem_first_coalesce); +void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len); +int sk_msg_free(struct sock *sk, struct sk_msg *msg); +int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg); +void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes); +void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg, + u32 bytes); + +void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes); + +int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from, + struct sk_msg *msg, u32 bytes); +int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, + struct sk_msg *msg, u32 bytes); + +static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes) +{ + WARN_ON(i == msg->sg.end && bytes); +} + +static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes) +{ + if (psock->apply_bytes) { + if (psock->apply_bytes < bytes) + psock->apply_bytes = 0; + else + psock->apply_bytes -= bytes; + } +} + +#define sk_msg_iter_var_prev(var) \ + do { \ + if (var == 0) \ + var = MAX_MSG_FRAGS - 1; \ + else \ + var--; \ + } while (0) + +#define sk_msg_iter_var_next(var) \ + do { \ + var++; \ + if (var == MAX_MSG_FRAGS) \ + var = 0; \ + } while (0) + +#define sk_msg_iter_prev(msg, which) \ + sk_msg_iter_var_prev(msg->sg.which) + +#define sk_msg_iter_next(msg, which) \ + sk_msg_iter_var_next(msg->sg.which) + +static inline void sk_msg_clear_meta(struct sk_msg *msg) +{ + memset(&msg->sg, 0, offsetofend(struct sk_msg_sg, copy)); +} + +static inline void sk_msg_init(struct sk_msg *msg) +{ + memset(msg, 0, sizeof(*msg)); + sg_init_marker(msg->sg.data, ARRAY_SIZE(msg->sg.data)); +} + +static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, + int which, u32 size) +{ + dst->sg.data[which] = src->sg.data[which]; + dst->sg.data[which].length = size; + src->sg.data[which].length -= size; + src->sg.data[which].offset += size; +} + +static inline u32 sk_msg_elem_used(const struct sk_msg *msg) +{ + return msg->sg.end >= msg->sg.start ? + msg->sg.end - msg->sg.start : + msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start); +} + +static inline bool sk_msg_full(const struct sk_msg *msg) +{ + return (msg->sg.end == msg->sg.start) && msg->sg.size; +} + +static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which) +{ + return &msg->sg.data[which]; +} + +static inline struct page *sk_msg_page(struct sk_msg *msg, int which) +{ + return sg_page(sk_msg_elem(msg, which)); +} + +static inline bool sk_msg_to_ingress(const struct sk_msg *msg) +{ + return msg->flags & BPF_F_INGRESS; +} + +static inline void sk_msg_compute_data_pointers(struct sk_msg *msg) +{ + struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start); + + if (msg->sg.copy[msg->sg.start]) { + msg->data = NULL; + msg->data_end = NULL; + } else { + msg->data = sg_virt(sge); + msg->data_end = msg->data + sge->length; + } +} + +static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, + u32 len, u32 offset) +{ + struct scatterlist *sge; + + get_page(page); + sge = sk_msg_elem(msg, msg->sg.end); + sg_set_page(sge, page, len, offset); + sg_unmark_end(sge); + + msg->sg.copy[msg->sg.end] = true; + msg->sg.size += len; + sk_msg_iter_next(msg, end); +} + +static inline struct sk_psock *sk_psock(const struct sock *sk) +{ + return rcu_dereference_sk_user_data(sk); +} + +static inline bool sk_has_psock(struct sock *sk) +{ + return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg; +} + +static inline void sk_psock_queue_msg(struct sk_psock *psock, + struct sk_msg *msg) +{ + list_add_tail(&msg->list, &psock->ingress_msg); +} + +static inline void sk_psock_report_error(struct sk_psock *psock, int err) +{ + struct sock *sk = psock->sk; + + sk->sk_err = err; + sk->sk_error_report(sk); +} + +struct sk_psock *sk_psock_init(struct sock *sk, int node); + +int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); +void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock); +void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock); + +int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, + struct sk_msg *msg); + +static inline struct sk_psock_link *sk_psock_init_link(void) +{ + return kzalloc(sizeof(struct sk_psock_link), + GFP_ATOMIC | __GFP_NOWARN); +} + +static inline void sk_psock_free_link(struct sk_psock_link *link) +{ + kfree(link); +} + +struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock); +#if defined(CONFIG_BPF_STREAM_PARSER) +void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link); +#else +static inline void sk_psock_unlink(struct sock *sk, + struct sk_psock_link *link) +{ +} +#endif + +void __sk_psock_purge_ingress_msg(struct sk_psock *psock); + +static inline void sk_psock_cork_free(struct sk_psock *psock) +{ + if (psock->cork) { + sk_msg_free(psock->sk, psock->cork); + kfree(psock->cork); + psock->cork = NULL; + } +} + +static inline void sk_psock_update_proto(struct sock *sk, + struct sk_psock *psock, + struct proto *ops) +{ + psock->saved_unhash = sk->sk_prot->unhash; + psock->saved_close = sk->sk_prot->close; + psock->saved_write_space = sk->sk_write_space; + + psock->sk_proto = sk->sk_prot; + sk->sk_prot = ops; +} + +static inline void sk_psock_restore_proto(struct sock *sk, + struct sk_psock *psock) +{ + if (psock->sk_proto) { + sk->sk_prot = psock->sk_proto; + psock->sk_proto = NULL; + } +} + +static inline void sk_psock_set_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + set_bit(bit, &psock->state); +} + +static inline void sk_psock_clear_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + clear_bit(bit, &psock->state); +} + +static inline bool sk_psock_test_state(const struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + return test_bit(bit, &psock->state); +} + +static inline struct sk_psock *sk_psock_get(struct sock *sk) +{ + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock(sk); + if (psock && !refcount_inc_not_zero(&psock->refcnt)) + psock = NULL; + rcu_read_unlock(); + return psock; +} + +void sk_psock_stop(struct sock *sk, struct sk_psock *psock); +void sk_psock_destroy(struct rcu_head *rcu); +void sk_psock_drop(struct sock *sk, struct sk_psock *psock); + +static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) +{ + if (refcount_dec_and_test(&psock->refcnt)) + sk_psock_drop(sk, psock); +} + +static inline void psock_set_prog(struct bpf_prog **pprog, + struct bpf_prog *prog) +{ + prog = xchg(pprog, prog); + if (prog) + bpf_prog_put(prog); +} + +static inline void psock_progs_drop(struct sk_psock_progs *progs) +{ + psock_set_prog(&progs->msg_parser, NULL); + psock_set_prog(&progs->skb_parser, NULL); + psock_set_prog(&progs->skb_verdict, NULL); +} + +#endif /* _LINUX_SKMSG_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 8f5cef67fd35..3600ae0f25c3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -858,6 +858,21 @@ static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb) TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb); } +static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS; +} + +static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->bpf.sk_redir; +} + +static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb) +{ + TCP_SKB_CB(skb)->bpf.sk_redir = NULL; +} + #if IS_ENABLED(CONFIG_IPV6) /* This is the variant of inet6_iif() that must be used by TCP, * as TCP moves IP6CB into a different location in skb->cb[] @@ -2064,6 +2079,18 @@ void tcp_cleanup_ulp(struct sock *sk); __MODULE_INFO(alias, alias_userspace, name); \ __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name) +struct sk_msg; +struct sk_psock; + +int tcp_bpf_init(struct sock *sk); +void tcp_bpf_reinit(struct sock *sk); +int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, + int flags); +int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int nonblock, int flags, int *addr_len); +int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, + struct msghdr *msg, int len); + /* Call BPF_SOCK_OPS program that returns an int. If the return value * is < 0, then the BPF op failed (for example if the loaded BPF * program does not support the chosen operation or there is no BPF -- cgit v1.2.3 From d829e9c4112b52f4f00195900fd4c685f61365ab Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 13 Oct 2018 02:45:59 +0200 Subject: tls: convert to generic sk_msg interface Convert kTLS over to make use of sk_msg interface for plaintext and encrypted scattergather data, so it reuses all the sk_msg helpers and data structure which later on in a second step enables to glue this to BPF. This also allows to remove quite a bit of open coded helpers which are covered by the sk_msg API. Recent changes in kTLs 80ece6a03aaf ("tls: Remove redundant vars from tls record structure") and 4e6d47206c32 ("tls: Add support for inplace records encryption") changed the data path handling a bit; while we've kept the latter optimization intact, we had to undo the former change to better fit the sk_msg model, hence the sg_aead_in and sg_aead_out have been brought back and are linked into the sk_msg sgs. Now the kTLS record contains a msg_plaintext and msg_encrypted sk_msg each. In the original code, the zerocopy_from_iter() has been used out of TX but also RX path. For the strparser skb-based RX path, we've left the zerocopy_from_iter() in decrypt_internal() mostly untouched, meaning it has been moved into tls_setup_from_iter() with charging logic removed (as not used from RX). Given RX path is not based on sk_msg objects, we haven't pursued setting up a dummy sk_msg to call into sk_msg_zerocopy_from_iter(), but it could be an option to prusue in a later step. Joint work with John. Signed-off-by: Daniel Borkmann Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov --- include/linux/skmsg.h | 2 ++ include/net/sock.h | 4 ---- include/net/tls.h | 18 +++++++++--------- 3 files changed, 11 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 95678103c4a0..4e84b3c2eff8 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -102,6 +102,8 @@ struct sk_psock { int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, int elem_first_coalesce); +int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src, + u32 off, u32 len); void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len); int sk_msg_free(struct sock *sk, struct sk_msg *msg); int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg); diff --git a/include/net/sock.h b/include/net/sock.h index 751549ac0a84..7470c45d182d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2214,10 +2214,6 @@ static inline struct page_frag *sk_page_frag(struct sock *sk) bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); -int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg, - int sg_start, int *sg_curr, unsigned int *sg_size, - int first_coalesce); - /* * Default write policy as shown to user space via poll/select/SIGIO */ diff --git a/include/net/tls.h b/include/net/tls.h index 5e853835597e..3d22d8a59be7 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -39,6 +39,8 @@ #include #include #include +#include + #include #include #include @@ -103,15 +105,13 @@ struct tls_rec { int tx_flags; int inplace_crypto; - /* AAD | sg_plaintext_data | sg_tag */ - struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS + 1]; - /* AAD | sg_encrypted_data (data contain overhead for hdr&iv&tag) */ - struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS + 1]; + struct sk_msg msg_plaintext; + struct sk_msg msg_encrypted; - unsigned int sg_plaintext_size; - unsigned int sg_encrypted_size; - int sg_plaintext_num_elem; - int sg_encrypted_num_elem; + /* AAD | msg_plaintext.sg.data | sg_tag */ + struct scatterlist sg_aead_in[2]; + /* AAD | msg_encrypted.sg.data (data contains overhead for hdr & iv & tag) */ + struct scatterlist sg_aead_out[2]; char aad_space[TLS_AAD_SPACE_SIZE]; struct aead_request aead_req; @@ -223,8 +223,8 @@ struct tls_context { unsigned long flags; bool in_tcp_sendpages; + bool pending_open_record_frags; - u16 pending_open_record_frags; int (*push_pending_record)(struct sock *sk, int flags); void (*sk_write_space)(struct sock *sk); -- cgit v1.2.3 From 924ad65ed01ee0eec5d2a3280c01c394343d6df7 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sat, 13 Oct 2018 02:46:00 +0200 Subject: tls: replace poll implementation with read hook Instead of re-implementing poll routine use the poll callback to trigger read from kTLS, we reuse the stream_memory_read callback which is simpler and achieves the same. This helps to align sockmap and kTLS so we can more easily embed BPF in kTLS. Joint work with Daniel. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- include/net/tls.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 3d22d8a59be7..bab5627ff5e3 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -142,8 +142,7 @@ struct tls_sw_context_rx { struct strparser strp; void (*saved_data_ready)(struct sock *sk); - unsigned int (*sk_poll)(struct file *file, struct socket *sock, - struct poll_table_struct *wait); + struct sk_buff *recv_pkt; u8 control; bool decrypted; @@ -272,8 +271,7 @@ void tls_sw_free_resources_rx(struct sock *sk); void tls_sw_release_resources_rx(struct sock *sk); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -unsigned int tls_sw_poll(struct file *file, struct socket *sock, - struct poll_table_struct *wait); +bool tls_sw_stream_read(const struct sock *sk); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); -- cgit v1.2.3 From d3b18ad31f93d0b6bae105c679018a1ba7daa9ca Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sat, 13 Oct 2018 02:46:01 +0200 Subject: tls: add bpf support to sk_msg handling This work adds BPF sk_msg verdict program support to kTLS allowing BPF and kTLS to be combined together. Previously kTLS and sk_msg verdict programs were mutually exclusive in the ULP layer which created challenges for the orchestrator when trying to apply TCP based policy, for example. To resolve this, leveraging the work from previous patches that consolidates the use of sk_msg, we can finally enable BPF sk_msg verdict programs so they continue to run after the kTLS socket is created. No change in behavior when kTLS is not used in combination with BPF, the kselftest suite for kTLS also runs successfully. Joint work with Daniel. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- include/linux/skmsg.h | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 4e84b3c2eff8..0b919f0bc6d6 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -29,7 +29,11 @@ struct sk_msg_sg { u32 size; u32 copybreak; bool copy[MAX_MSG_FRAGS]; - struct scatterlist data[MAX_MSG_FRAGS]; + /* The extra element is used for chaining the front and sections when + * the list becomes partitioned (e.g. end < start). The crypto APIs + * require the chaining. + */ + struct scatterlist data[MAX_MSG_FRAGS + 1]; }; struct sk_msg { @@ -112,6 +116,7 @@ void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg, u32 bytes); void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes); +void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes); int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); @@ -161,8 +166,9 @@ static inline void sk_msg_clear_meta(struct sk_msg *msg) static inline void sk_msg_init(struct sk_msg *msg) { + BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != MAX_MSG_FRAGS); memset(msg, 0, sizeof(*msg)); - sg_init_marker(msg->sg.data, ARRAY_SIZE(msg->sg.data)); + sg_init_marker(msg->sg.data, MAX_MSG_FRAGS); } static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, @@ -174,6 +180,12 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, src->sg.data[which].offset += size; } +static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src) +{ + memcpy(dst, src, sizeof(*src)); + sk_msg_init(src); +} + static inline u32 sk_msg_elem_used(const struct sk_msg *msg) { return msg->sg.end >= msg->sg.start ? @@ -229,6 +241,26 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, sk_msg_iter_next(msg, end); } +static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state) +{ + do { + msg->sg.copy[i] = copy_state; + sk_msg_iter_var_next(i); + if (i == msg->sg.end) + break; + } while (1); +} + +static inline void sk_msg_sg_copy_set(struct sk_msg *msg, u32 start) +{ + sk_msg_sg_copy(msg, start, true); +} + +static inline void sk_msg_sg_copy_clear(struct sk_msg *msg, u32 start) +{ + sk_msg_sg_copy(msg, start, false); +} + static inline struct sk_psock *sk_psock(const struct sock *sk) { return rcu_dereference_sk_user_data(sk); @@ -245,6 +277,11 @@ static inline void sk_psock_queue_msg(struct sk_psock *psock, list_add_tail(&msg->list, &psock->ingress_msg); } +static inline bool sk_psock_queue_empty(const struct sk_psock *psock) +{ + return psock ? list_empty(&psock->ingress_msg) : true; +} + static inline void sk_psock_report_error(struct sk_psock *psock, int err) { struct sock *sk = psock->sk; -- cgit v1.2.3 From 8a615c6b0352a9ec56151b6c95d68e0a2eef5cf0 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Mon, 15 Oct 2018 10:27:45 -0700 Subject: bpf: Allow sk_lookup with IPv6 module This is a more complete fix than d71019b54bff ("net: core: Fix build with CONFIG_IPV6=m"), so that IPv6 sockets may be looked up if the IPv6 module is loaded (not just if it's compiled in). Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov --- include/net/addrconf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 6def0351bcc3..14b789a123e7 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -265,6 +265,11 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly; struct ipv6_bpf_stub { int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, bool force_bind_address_no_port, bool with_lock); + struct sock *(*udp6_lib_lookup)(struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, + int dif, int sdif, struct udp_table *tbl, + struct sk_buff *skb); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; -- cgit v1.2.3 From 61414f5ec9834df8aa4f55c90de16b71a3d6ca8d Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 9 Oct 2018 23:57:43 +0100 Subject: FDDI: defza: Add support for DEC FDDIcontroller 700 TURBOchannel adapter Add support for the DEC FDDIcontroller 700 (DEFZA), Digital Equipment Corporation's first-generation FDDI network interface adapter, made for TURBOchannel and based on a discrete version of what eventually became Motorola's widely used CAMEL chipset. The CAMEL chipset is present for example in the DEC FDDIcontroller TURBOchannel, EISA and PCI adapters (DEFTA/DEFEA/DEFPA) that we support with the `defxx' driver, however the host bus interface logic and the firmware API are different in the DEFZA and hence a separate driver is required. There isn't much to say about the driver except that it works, but there is one peculiarity to mention. The adapter implements two Tx/Rx queue pairs. Of these one pair is the usual network Tx/Rx queue pair, in this case used by the adapter to exchange frames with the ring, via the RMC (Ring Memory Controller) chip. The Tx queue is handled directly by the RMC chip and resides in onboard packet memory. The Rx queue is maintained via DMA in host memory by adapter's firmware copying received data stored by the RMC in onboard packet memory. The other pair is used to communicate SMT frames with adapter's firmware. Any SMT frame received from the RMC via the Rx queue must be queued back by the driver to the SMT Rx queue for the firmware to process. Similarly the firmware uses the SMT Tx queue to supply the driver with SMT frames that must be queued back to the Tx queue for the RMC to send to the ring. This solution was chosen because the designers ran out of PCB space and could not squeeze in more logic onto the board that would be required to handle this SMT frame traffic without the need to involve the driver, as with the later DEFTA/DEFEA/DEFPA adapters. Finally the driver does some Frame Control byte decoding, so to avoid magic numbers some macros are added to . Signed-off-by: Maciej W. Rozycki Signed-off-by: David S. Miller --- include/uapi/linux/if_fddi.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_fddi.h b/include/uapi/linux/if_fddi.h index 75eed8b62823..7239aa9c0766 100644 --- a/include/uapi/linux/if_fddi.h +++ b/include/uapi/linux/if_fddi.h @@ -6,9 +6,10 @@ * * Global definitions for the ANSI FDDI interface. * - * Version: @(#)if_fddi.h 1.0.2 Sep 29 2004 + * Version: @(#)if_fddi.h 1.0.3 Oct 6 2018 * - * Author: Lawrence V. Stefani, + * Author: Lawrence V. Stefani, + * Maintainer: Maciej W. Rozycki, * * if_fddi.h is based on previous if_ether.h and if_tr.h work by * Fred N. van Kempen, @@ -45,7 +46,21 @@ #define FDDI_K_OUI_LEN 3 /* Octets in OUI in 802.2 SNAP header */ -/* Define FDDI Frame Control (FC) Byte values */ +/* Define FDDI Frame Control (FC) Byte masks */ +#define FDDI_FC_K_CLASS_MASK 0x80 /* class bit */ +#define FDDI_FC_K_CLASS_SYNC 0x80 +#define FDDI_FC_K_CLASS_ASYNC 0x00 +#define FDDI_FC_K_ALEN_MASK 0x40 /* address length bit */ +#define FDDI_FC_K_ALEN_48 0x40 +#define FDDI_FC_K_ALEN_16 0x00 +#define FDDI_FC_K_FORMAT_MASK 0x30 /* format bits */ +#define FDDI_FC_K_FORMAT_FUTURE 0x30 +#define FDDI_FC_K_FORMAT_IMPLEMENTOR 0x20 +#define FDDI_FC_K_FORMAT_LLC 0x10 +#define FDDI_FC_K_FORMAT_MANAGEMENT 0x00 +#define FDDI_FC_K_CONTROL_MASK 0x0f /* control bits */ + +/* Define FDDI Frame Control (FC) Byte specific values */ #define FDDI_FC_K_VOID 0x00 #define FDDI_FC_K_NON_RESTRICTED_TOKEN 0x80 #define FDDI_FC_K_RESTRICTED_TOKEN 0xC0 -- cgit v1.2.3 From 9f9a742db40f95f4dc20fc7293de4ea6ddb24e47 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 9 Oct 2018 23:57:49 +0100 Subject: FDDI: defza: Support capturing outgoing SMT traffic DEC FDDIcontroller 700 (DEFZA) uses a Tx/Rx queue pair to communicate SMT frames with adapter's firmware. Any SMT frame received from the RMC via the Rx queue is queued back by the driver to the SMT Rx queue for the firmware to process. Similarly the firmware uses the SMT Tx queue to supply the driver with SMT frames which are queued back to the Tx queue for the RMC to send to the ring. When a network tap is attached to an FDDI interface handled by `defza' any incoming SMT frames captured are queued to our usual processing of network data received, which in turn delivers them to any listening taps. However the outgoing SMT frames produced by the firmware bypass our network protocol stack and are therefore not delivered to taps. This in turn means that taps are missing a part of network traffic sent by the adapter, which may make it more difficult to track down network problems or do general traffic analysis. Call `dev_queue_xmit_nit' then in the SMT Tx path, having checked that a network tap is attached, with a newly-created `dev_nit_active' helper wrapping the usual condition used in the transmit path. Signed-off-by: Maciej W. Rozycki Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 22e4ef7bb701..dc1d9ed33b31 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3645,6 +3645,7 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, return 0; } +bool dev_nit_active(struct net_device *dev); void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; -- cgit v1.2.3 From 9771b8ccdfa6dcb1ac5128ca7fe8649f3092d392 Mon Sep 17 00:00:00 2001 From: "Justin.Lee1@Dell.com" Date: Thu, 11 Oct 2018 18:07:37 +0000 Subject: net/ncsi: Extend NC-SI Netlink interface to allow user space to send NC-SI command The new command (NCSI_CMD_SEND_CMD) is added to allow user space application to send NC-SI command to the network card. Also, add a new attribute (NCSI_ATTR_DATA) for transferring request and response. The work flow is as below. Request: User space application -> Netlink interface (msg) -> new Netlink handler - ncsi_send_cmd_nl() -> ncsi_xmit_cmd() Response: Response received - ncsi_rcv_rsp() -> internal response handler - ncsi_rsp_handler_xxx() -> ncsi_rsp_handler_netlink() -> ncsi_send_netlink_rsp () -> Netlink interface (msg) -> user space application Command timeout - ncsi_request_timeout() -> ncsi_send_netlink_timeout () -> Netlink interface (msg with zero data length) -> user space application Error: Error detected -> ncsi_send_netlink_err () -> Netlink interface (err msg) -> user space application Signed-off-by: Justin Lee Reviewed-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- include/uapi/linux/ncsi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ncsi.h b/include/uapi/linux/ncsi.h index 4c292ecbb748..0a26a5576645 100644 --- a/include/uapi/linux/ncsi.h +++ b/include/uapi/linux/ncsi.h @@ -23,6 +23,9 @@ * optionally the preferred NCSI_ATTR_CHANNEL_ID. * @NCSI_CMD_CLEAR_INTERFACE: clear any preferred package/channel combination. * Requires NCSI_ATTR_IFINDEX. + * @NCSI_CMD_SEND_CMD: send NC-SI command to network card. + * Requires NCSI_ATTR_IFINDEX, NCSI_ATTR_PACKAGE_ID + * and NCSI_ATTR_CHANNEL_ID. * @NCSI_CMD_MAX: highest command number */ enum ncsi_nl_commands { @@ -30,6 +33,7 @@ enum ncsi_nl_commands { NCSI_CMD_PKG_INFO, NCSI_CMD_SET_INTERFACE, NCSI_CMD_CLEAR_INTERFACE, + NCSI_CMD_SEND_CMD, __NCSI_CMD_AFTER_LAST, NCSI_CMD_MAX = __NCSI_CMD_AFTER_LAST - 1 @@ -43,6 +47,7 @@ enum ncsi_nl_commands { * @NCSI_ATTR_PACKAGE_LIST: nested array of NCSI_PKG_ATTR attributes * @NCSI_ATTR_PACKAGE_ID: package ID * @NCSI_ATTR_CHANNEL_ID: channel ID + * @NCSI_ATTR_DATA: command payload * @NCSI_ATTR_MAX: highest attribute number */ enum ncsi_nl_attrs { @@ -51,6 +56,7 @@ enum ncsi_nl_attrs { NCSI_ATTR_PACKAGE_LIST, NCSI_ATTR_PACKAGE_ID, NCSI_ATTR_CHANNEL_ID, + NCSI_ATTR_DATA, __NCSI_ATTR_AFTER_LAST, NCSI_ATTR_MAX = __NCSI_ATTR_AFTER_LAST - 1 -- cgit v1.2.3 From 5f6188a8003d080e3753b8f14f4a5a2325ae1ff6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Oct 2018 09:37:52 -0700 Subject: tcp: do not change tcp_wstamp_ns in tcp_mstamp_refresh In EDT design, I made the mistake of using tcp_wstamp_ns to store the last tcp_clock_ns() sample and to store the pacing virtual timer. This causes major regressions at high speed flows. Introduce tcp_clock_cache to store last tcp_clock_ns(). This is needed because some arches have slow high-resolution kernel time service. tcp_wstamp_ns is only updated when a packet is sent. Note that we can remove tcp_mstamp in the future since tcp_mstamp is essentially tcp_clock_cache/1000, so the apparent socket size increase is temporary. Fixes: 9799ccb0e984 ("tcp: add tcp_wstamp_ns socket field") Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 848f5b25e178..8ed77bb4ed86 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -249,6 +249,7 @@ struct tcp_sock { u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */ + u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ /* RTT measurement */ u64 tcp_mstamp; /* most recent packet received/sent */ -- cgit v1.2.3 From 76a9ebe811fb3d0605cb084f1ae6be5610541865 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Oct 2018 09:37:53 -0700 Subject: net: extend sk_pacing_rate to unsigned long sk_pacing_rate has beed introduced as a u32 field in 2013, effectively limiting per flow pacing to 34Gbit. We believe it is time to allow TCP to pace high speed flows on 64bit hosts, as we now can reach 100Gbit on one TCP flow. This patch adds no cost for 32bit kernels. The tcpi_pacing_rate and tcpi_max_pacing_rate were already exported as 64bit, so iproute2/ss command require no changes. Unfortunately the SO_MAX_PACING_RATE socket option will stay 32bit and we will need to add a new option to let applications control high pacing rates. State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 1787144 10.246.9.76:49992 10.246.9.77:36741 timer:(on,003ms,0) ino:91863 sk:2 <-> skmem:(r0,rb540000,t66440,tb2363904,f605944,w1822984,o0,bl0,d0) ts sack bbr wscale:8,8 rto:201 rtt:0.057/0.006 mss:1448 rcvmss:536 advmss:1448 cwnd:138 ssthresh:178 bytes_acked:256699822585 segs_out:177279177 segs_in:3916318 data_segs_out:177279175 bbr:(bw:31276.8Mbps,mrtt:0,pacing_gain:1.25,cwnd_gain:2) send 28045.5Mbps lastrcv:73333 pacing_rate 38705.0Mbps delivery_rate 22997.6Mbps busy:73333ms unacked:135 retrans:0/157 rcv_space:14480 notsent:2085120 minrtt:0.013 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 751549ac0a84..cfaf261936c8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -422,8 +422,8 @@ struct sock { struct timer_list sk_timer; __u32 sk_priority; __u32 sk_mark; - u32 sk_pacing_rate; /* bytes per second */ - u32 sk_max_pacing_rate; + unsigned long sk_pacing_rate; /* bytes per second */ + unsigned long sk_max_pacing_rate; struct page_frag sk_frag; netdev_features_t sk_route_caps; netdev_features_t sk_route_nocaps; -- cgit v1.2.3 From 22e6c58b8c2843337ec4e8464b1ce6e869ca5bf4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:41 -0700 Subject: netlink: Add answer_flags to netlink_callback With dump filtering we need a way to ensure the NLM_F_DUMP_FILTERED flag is set on a message back to the user if the data returned is influenced by some input attributes. Normally this can be done as messages are added to the skb, but if the filter results in no data being returned, the user could be confused as to why. This patch adds answer_flags to the netlink_callback allowing dump handlers to set the NLM_F_DUMP_FILTERED at a minimum in the NLMSG_DONE message ensuring the flag gets back to the user. The netlink_callback space is initialized to 0 via a memset in __netlink_dump_start, so init of the new answer_flags is covered. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 72580f1a72a2..4da90a6ab536 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -180,6 +180,7 @@ struct netlink_callback { u16 family; u16 min_dump_alloc; bool strict_check; + u16 answer_flags; unsigned int prev_seq, seq; long args[6]; }; -- cgit v1.2.3 From 4724676d551c0961659b1da3fb4b5928169fb184 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:42 -0700 Subject: net: Add struct for fib dump filter Add struct fib_dump_filter for options on limiting which routes are returned in a dump request. The current list is table id, protocol, route type, rtm_flags and nexthop device index. struct net is needed to lookup the net_device from the index. Declare the filter for each route dump handler and plumb the new arguments from dump handlers to ip_valid_fib_dump_req. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_route.h | 1 + include/net/ip_fib.h | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index cef186dbd2ce..7ab119936e69 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -174,6 +174,7 @@ struct rt6_rtnl_dump_arg { struct sk_buff *skb; struct netlink_callback *cb; struct net *net; + struct fib_dump_filter filter; }; int rt6_dump_route(struct fib6_info *f6i, void *p_arg); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 852e4ebf2209..667013bf4266 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -222,6 +222,16 @@ struct fib_table { unsigned long __data[0]; }; +struct fib_dump_filter { + u32 table_id; + /* filter_set is an optimization that an entry is set */ + bool filter_set; + unsigned char protocol; + unsigned char rt_type; + unsigned int flags; + struct net_device *dev; +}; + int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags); int fib_table_insert(struct net *, struct fib_table *, struct fib_config *, @@ -453,6 +463,7 @@ static inline void fib_proc_exit(struct net *net) u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); -int ip_valid_fib_dump_req(const struct nlmsghdr *nlh, +int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, + struct fib_dump_filter *filter, struct netlink_ext_ack *extack); #endif /* _NET_FIB_H */ -- cgit v1.2.3 From 18a8021a7be3207686851208f91a2f105b2d4703 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:43 -0700 Subject: net/ipv4: Plumb support for filtering route dumps Implement kernel side filtering of routes by table id, egress device index, protocol and route type. If the table id is given in the filter, lookup the table and call fib_table_dump directly for it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 667013bf4266..1eabc9edd2b9 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -239,7 +239,7 @@ int fib_table_insert(struct net *, struct fib_table *, struct fib_config *, int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, - struct netlink_callback *cb); + struct netlink_callback *cb, struct fib_dump_filter *filter); int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); -- cgit v1.2.3 From e1cedae1ba6b09ae8376c1486712bf91ea0dfc41 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:46 -0700 Subject: ipmr: Refactor mr_rtm_dumproute Move per-table loops from mr_rtm_dumproute to mr_table_dump and export mr_table_dump for dumps by specific table id. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 6675b9f81979..db85373c8d15 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -283,6 +283,12 @@ void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg); int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mr_mfc *c, struct rtmsg *rtm); +int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, + struct netlink_callback *cb, + int (*fill)(struct mr_table *mrt, struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, + int cmd, int flags), + spinlock_t *lock); int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct mr_table *(*iter)(struct net *net, struct mr_table *mrt), -- cgit v1.2.3 From cb167893f41e21e6bd283d78e53489289dc0592d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:47 -0700 Subject: net: Plumb support for filtering ipv4 and ipv6 multicast route dumps Implement kernel side filtering of routes by egress device index and table id. If the table id is given in the filter, lookup table and call mr_table_dump directly for it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index db85373c8d15..34de06b426ef 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -7,6 +7,7 @@ #include #include #include +#include /** * struct vif_device - interface representor for multicast routing @@ -288,7 +289,7 @@ int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, int (*fill)(struct mr_table *mrt, struct sk_buff *skb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags), - spinlock_t *lock); + spinlock_t *lock, struct fib_dump_filter *filter); int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct mr_table *(*iter)(struct net *net, struct mr_table *mrt), @@ -296,7 +297,7 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct sk_buff *skb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags), - spinlock_t *lock); + spinlock_t *lock, struct fib_dump_filter *filter); int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int (*rules_dump)(struct net *net, @@ -346,7 +347,7 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct sk_buff *skb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags), - spinlock_t *lock) + spinlock_t *lock, struct fib_dump_filter *filter) { return -EINVAL; } -- cgit v1.2.3 From effe6792662495ad9c175bf0d9c53459a51fdbbd Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:48 -0700 Subject: net: Enable kernel side filtering of route dumps Update parsing of route dump request to enable kernel side filtering. Allow filtering results by protocol (e.g., which routing daemon installed the route), route type (e.g., unicast), table id and nexthop device. These amount to the low hanging fruit, yet a huge improvement, for dumping routes. ip_valid_fib_dump_req is called with RTNL held, so __dev_get_by_index can be used to look up the device index without taking a reference. From there filter->dev is only used during dump loops with the lock still held. Set NLM_F_DUMP_FILTERED in the answer_flags so the user knows the results have been filtered should no entries be returned. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 1eabc9edd2b9..e8d9456bf36e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -465,5 +465,5 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct fib_dump_filter *filter, - struct netlink_ext_ack *extack); + struct netlink_callback *cb); #endif /* _NET_FIB_H */ -- cgit v1.2.3 From a218dc82f0b5c6c8ad3d58c9870ed69e26c08b3e Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 10 Oct 2018 09:57:13 +0200 Subject: netfilter: nft_osf: Add ttl option support Add ttl option support to the nftables "osf" expression. Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink_osf.h | 3 ++- include/uapi/linux/netfilter/nf_tables.h | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h index ecf7dab81e9e..c6000046c966 100644 --- a/include/linux/netfilter/nfnetlink_osf.h +++ b/include/linux/netfilter/nfnetlink_osf.h @@ -27,6 +27,7 @@ bool nf_osf_match(const struct sk_buff *skb, u_int8_t family, const struct list_head *nf_osf_fingers); const char *nf_osf_find(const struct sk_buff *skb, - const struct list_head *nf_osf_fingers); + const struct list_head *nf_osf_fingers, + const int ttl_check); #endif /* _NFOSF_H */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 5444e76870bb..579974b0bf0d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1511,9 +1511,16 @@ enum nft_flowtable_hook_attributes { }; #define NFTA_FLOWTABLE_HOOK_MAX (__NFTA_FLOWTABLE_HOOK_MAX - 1) +/** + * enum nft_osf_attributes - nftables osf expression netlink attributes + * + * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8) + */ enum nft_osf_attributes { NFTA_OSF_UNSPEC, NFTA_OSF_DREG, + NFTA_OSF_TTL, __NFTA_OSF_MAX, }; #define NFTA_OSF_MAX (__NFTA_OSF_MAX - 1) -- cgit v1.2.3 From c56a8be7e7aa855ebcccf0e9d9eba2216514d399 Mon Sep 17 00:00:00 2001 From: Rahul Verma Date: Tue, 16 Oct 2018 03:59:20 -0700 Subject: qed: Add supported link and advertise link to display in ethtool. Added transceiver type, speed capability and board types in HSI, are utilizing to display the accurate link information in ethtool. Signed-off-by: Rahul Verma Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index dee3c9c744f7..a47321a0d572 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -667,15 +667,35 @@ enum qed_link_mode_bits { QED_LM_Autoneg_BIT = BIT(1), QED_LM_Asym_Pause_BIT = BIT(2), QED_LM_Pause_BIT = BIT(3), - QED_LM_1000baseT_Half_BIT = BIT(4), - QED_LM_1000baseT_Full_BIT = BIT(5), + QED_LM_1000baseT_Full_BIT = BIT(4), + QED_LM_10000baseT_Full_BIT = BIT(5), QED_LM_10000baseKR_Full_BIT = BIT(6), QED_LM_20000baseKR2_Full_BIT = BIT(7), QED_LM_25000baseKR_Full_BIT = BIT(8), QED_LM_40000baseLR4_Full_BIT = BIT(9), QED_LM_50000baseKR2_Full_BIT = BIT(10), QED_LM_100000baseKR4_Full_BIT = BIT(11), - QED_LM_COUNT = 11 + QED_LM_2500baseX_Full_BIT = BIT(12), + QED_LM_Backplane_BIT = BIT(13), + QED_LM_1000baseKX_Full_BIT = BIT(14), + QED_LM_10000baseKX4_Full_BIT = BIT(15), + QED_LM_10000baseR_FEC_BIT = BIT(16), + QED_LM_40000baseKR4_Full_BIT = BIT(17), + QED_LM_40000baseCR4_Full_BIT = BIT(18), + QED_LM_40000baseSR4_Full_BIT = BIT(19), + QED_LM_25000baseCR_Full_BIT = BIT(20), + QED_LM_25000baseSR_Full_BIT = BIT(21), + QED_LM_50000baseCR2_Full_BIT = BIT(22), + QED_LM_100000baseSR4_Full_BIT = BIT(23), + QED_LM_100000baseCR4_Full_BIT = BIT(24), + QED_LM_100000baseLR4_ER4_Full_BIT = BIT(25), + QED_LM_50000baseSR2_Full_BIT = BIT(26), + QED_LM_1000baseX_Full_BIT = BIT(27), + QED_LM_10000baseCR_Full_BIT = BIT(28), + QED_LM_10000baseSR_Full_BIT = BIT(29), + QED_LM_10000baseLR_Full_BIT = BIT(30), + QED_LM_10000baseLRM_Full_BIT = BIT(31), + QED_LM_COUNT = 32 }; struct qed_link_params { -- cgit v1.2.3 From 94a04d1d3d3681adde1a3e022b25dbac7b345b7e Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Tue, 9 Oct 2018 12:05:12 +0300 Subject: net/mlx5: Expose DC scatter to CQE capability bit dc_req_scat_data_cqe capability bit determines if requester scatter to cqe is available for 64 bytes CQE over DC transport type. Signed-off-by: Yonatan Cohen Reviewed-by: Guy Levi Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 68f4d5f9d929..0f460fb22c31 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1005,7 +1005,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 umr_modify_atomic_disabled[0x1]; u8 umr_indirect_mkey_disabled[0x1]; u8 umr_fence[0x2]; - u8 reserved_at_20c[0x3]; + u8 dc_req_scat_data_cqe[0x1]; + u8 reserved_at_20d[0x2]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; -- cgit v1.2.3 From 3f4c3127d332000530349db4843deece27fe5e0c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 16 Oct 2018 10:36:01 -0700 Subject: bpf: sockmap, fix skmsg recvmsg handler to track size correctly When converting sockmap to new skmsg generic data structures we missed that the recvmsg handler did not correctly use sg.size and instead was using individual elements length. The result is if a sock is closed with outstanding data we omit the call to sk_mem_uncharge() and can get the warning below. [ 66.728282] WARNING: CPU: 6 PID: 5783 at net/core/stream.c:206 sk_stream_kill_queues+0x1fa/0x210 To fix this correct the redirect handler to xfer the size along with the scatterlist and also decrement the size from the recvmsg handler. Now when a sock is closed the remaining 'size' will be decremented with sk_mem_uncharge(). Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/skmsg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 0b919f0bc6d6..31df0d9fa536 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -176,6 +176,7 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, { dst->sg.data[which] = src->sg.data[which]; dst->sg.data[which].length = size; + dst->sg.size += size; src->sg.data[which].length -= size; src->sg.data[which].offset += size; } -- cgit v1.2.3 From 8734a162c13b1a893e7dff8de0df81fed04c51a6 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 16 Oct 2018 11:07:59 -0700 Subject: bpf: skmsg, improve sk_msg_used_element to work in cork context Currently sk_msg_used_element is only called in zerocopy context where cork is not possible and if this case happens we fallback to copy mode. However the helper is more useful if it works in all contexts. This patch resolved the case where if end == head indicating a full or empty ring the helper always reports an empty ring. To fix this add a test for the full ring case to avoid reporting a full ring has 0 elements. This additional functionality will be used in the next patches from recvmsg context where end = head with a full ring is a valid case. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/linux/skmsg.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 31df0d9fa536..22347b08e1f8 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -187,18 +187,21 @@ static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src) sk_msg_init(src); } +static inline bool sk_msg_full(const struct sk_msg *msg) +{ + return (msg->sg.end == msg->sg.start) && msg->sg.size; +} + static inline u32 sk_msg_elem_used(const struct sk_msg *msg) { + if (sk_msg_full(msg)) + return MAX_MSG_FRAGS; + return msg->sg.end >= msg->sg.start ? msg->sg.end - msg->sg.start : msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start); } -static inline bool sk_msg_full(const struct sk_msg *msg) -{ - return (msg->sg.end == msg->sg.start) && msg->sg.size; -} - static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which) { return &msg->sg.data[which]; -- cgit v1.2.3 From 02c558b2d5d679fbbcaa5b9689484c7e0f8abb7b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 16 Oct 2018 11:08:04 -0700 Subject: bpf: sockmap, support for msg_peek in sk_msg with redirect ingress This adds support for the MSG_PEEK flag when doing redirect to ingress and receiving on the sk_msg psock queue. Previously the flag was being ignored which could confuse applications if they expected the flag to work as normal. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 3600ae0f25c3..14fdd7ce9992 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2089,7 +2089,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, - struct msghdr *msg, int len); + struct msghdr *msg, int len, int flags); /* Call BPF_SOCK_OPS program that returns an int. If the return value * is < 0, then the BPF op failed (for example if the loaded BPF -- cgit v1.2.3 From b8aee82250b7d90a32b11ba208656f52dbaca342 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 2 Oct 2018 22:57:24 +0000 Subject: net/mlx5: E-Switch, Get counters for offloaded flows from callers There's no real reason for the e-switch logic to manage the creation of counters for offloaded flows. The API already has the directive for the caller to denote they want to attach a counter to the created flow. As such, we go and move the management of flow counters to the mlx5e tc offload logic. This also lets us remove an inelegant interface where the FS layer had to provide a way to retrieve a counter from a flow rule. Signed-off-by: Mark Bloch Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index b1c026f1c8ba..74d0ea146c9a 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -186,7 +186,6 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, struct mlx5_flow_destination *new_dest, struct mlx5_flow_destination *old_dest); -struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handler); struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, -- cgit v1.2.3 From 171c7625bef999848ee6032c6dde96e7330c4d15 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 3 Oct 2018 00:03:35 +0000 Subject: net/mlx5: Use flow counter IDs and not the wrapping cache object Currently, when a flow rule is created using the FS core layer, the caller has to pass the entire flow counter object and not just the counter HW handle (ID). This requires both the FS core and the caller to have knowledge about the inner implementation of the FS layer flow counters cache and limits the possible users. Move to use the counter ID across the place when dealing with flows. Doing this decoupling, now can we privatize the inner implementation of the flow counters. Signed-off-by: Mark Bloch Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 74d0ea146c9a..a5fc62184195 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -92,7 +92,7 @@ struct mlx5_flow_destination { u32 tir_num; u32 ft_num; struct mlx5_flow_table *ft; - struct mlx5_fc *counter; + u32 counter_id; struct { u16 num; u16 vhca_id; @@ -192,6 +192,7 @@ void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, u64 *packets, u64 *bytes); +u32 mlx5_fc_id(struct mlx5_fc *counter); int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); -- cgit v1.2.3 From b9aa0ba17af5afa13605eb6ea91f1974da97a2e2 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 31 May 2018 11:50:23 +0300 Subject: net/mlx5: Add cap bits for multi fdb encap If set, the firmware supports creating of flow tables with encap enabled while VFs are configured, if we already created one (restriction still applies on the first creation). Signed-off-by: Paul Blakey Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 15e36198f85f..963611820006 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -584,7 +584,9 @@ struct mlx5_ifc_flow_table_nic_cap_bits { struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 reserved_at_0[0x1c]; u8 fdb_multi_path_to_table[0x1]; - u8 reserved_at_1d[0x1e3]; + u8 reserved_at_1d[0x1]; + u8 multi_fdb_encap[0x1]; + u8 reserved_at_1e[0x1e1]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb; -- cgit v1.2.3 From 328edb499f99126946845ece477c9c1afe8631af Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 3 Jul 2018 11:13:00 +0300 Subject: net/mlx5: Split FDB fast path prio to multiple namespaces Towards supporting multi-chains and priorities, split the FDB fast path to multiple namespaces (sub namespaces), each with multiple priorities. This patch adds a new flow steering type, FS_TYPE_PRIO_CHAINS, which is like current FS_TYPE_PRIO, but may contain only namespaces, and those will be in parallel to one another in terms of managing of the flow tables connections inside them. Meaning, while searching for the next or previous flow table to connect for a new table inside such namespace we skip the parallel namespaces in the same level under the FS_TYPE_PRIO_CHAINS prio we originated from. We use this new type for splitting the fast path prio into multiple parallel namespaces, each containing normal prios. The prios inside them (and their tables) will be connected to one another, but not from one parallel namespace to another, instead the last prio in each namespace will be connected to the next prio in the containing FDB namespace, which is the slow path prio. Signed-off-by: Paul Blakey Acked-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index a5fc62184195..f8d00872c7d3 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -101,6 +101,8 @@ struct mlx5_flow_destination { }; }; +struct mlx5_flow_namespace * +mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, int n); struct mlx5_flow_namespace * mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); -- cgit v1.2.3 From d5634fee245f9e92787e3a34ef621fc12b2cbf16 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 20 Sep 2018 12:17:48 +0200 Subject: net/mlx5: Add a no-append flow insertion mode If no-append flag is set, we will add a new FTE, instead of appending the actions of the inserted rule when the same match already exists. While here, move the has_flow_tag boolean indicator to be a flag too. This patch doesn't change any functionality. Signed-off-by: Paul Blakey Reviewed-by: Or Gerlitz Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index f8d00872c7d3..5660f07d3be0 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -158,20 +158,28 @@ struct mlx5_fs_vlan { #define MLX5_FS_VLAN_DEPTH 2 +enum { + FLOW_ACT_HAS_TAG = BIT(0), + FLOW_ACT_NO_APPEND = BIT(1), +}; + struct mlx5_flow_act { u32 action; - bool has_flow_tag; u32 flow_tag; u32 reformat_id; u32 modify_id; uintptr_t esp_id; + u32 flags; struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH]; struct ib_counters *counters; }; #define MLX5_DECLARE_FLOW_ACT(name) \ - struct mlx5_flow_act name = {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\ - MLX5_FS_DEFAULT_FLOW_TAG, 0, 0} + struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\ + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, \ + .reformat_id = 0, \ + .modify_id = 0, \ + .flags = 0, } /* Single destination per rule. * Group ID is implied by the match criteria. -- cgit v1.2.3 From cca45e054ce55c06046a37bf4d3fd7c17edd57da Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Oct 2018 08:53:10 +0000 Subject: vxlan: Export address checking functions Drivers that support VxLAN offload need to be able to sanitize the configuration of the VxLAN device and accept / reject its offload. For example, mlxsw requires that the local IP of the VxLAN device be set and that packets be flooded to unicast IP(s) and not to a multicast group. Expose the functions that perform such checks. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- include/net/vxlan.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 7ef15179f263..dd3d72ce64b6 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -370,4 +370,36 @@ static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) return vs->sock->sk->sk_family; } +#if IS_ENABLED(CONFIG_IPV6) + +static inline bool vxlan_addr_any(const union vxlan_addr *ipa) +{ + if (ipa->sa.sa_family == AF_INET6) + return ipv6_addr_any(&ipa->sin6.sin6_addr); + else + return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); +} + +static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) +{ + if (ipa->sa.sa_family == AF_INET6) + return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr); + else + return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); +} + +#else /* !IS_ENABLED(CONFIG_IPV6) */ + +static inline bool vxlan_addr_any(const union vxlan_addr *ipa) +{ + return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); +} + +static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) +{ + return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); +} + +#endif /* IS_ENABLED(CONFIG_IPV6) */ + #endif -- cgit v1.2.3 From 28e450333d4d1328710e258d38793c61658d4c95 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Oct 2018 08:53:12 +0000 Subject: inet: Refactor INET_ECN_decapsulate() Drivers that support tunnel decapsulation (IPinIP or NVE) need to configure the underlying device to conform to the behavior outlined in RFC 6040 with respect to the ECN bits. This behavior is implemented by INET_ECN_decapsulate() which requires an skb to be passed where the ECN CE bit can be potentially set. Since these drivers do not need to mark an skb, but only configure the device to do so, factor out the business logic to __INET_ECN_decapsulate() and potentially perform the marking in INET_ECN_decapsulate(). This allows drivers to invoke __INET_ECN_decapsulate() and configure the device. Signed-off-by: Ido Schimmel Suggested-by: Petr Machata Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- include/net/inet_ecn.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 482a1b705362..c8e2bebd8d93 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -183,8 +183,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) * 1 if something is broken and should be logged (!!! above) * 2 if packet should be dropped */ -static inline int INET_ECN_decapsulate(struct sk_buff *skb, - __u8 outer, __u8 inner) +static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce) { if (INET_ECN_is_not_ect(inner)) { switch (outer & INET_ECN_MASK) { @@ -198,10 +197,21 @@ static inline int INET_ECN_decapsulate(struct sk_buff *skb, } } - if (INET_ECN_is_ce(outer)) + *set_ce = INET_ECN_is_ce(outer); + return 0; +} + +static inline int INET_ECN_decapsulate(struct sk_buff *skb, + __u8 outer, __u8 inner) +{ + bool set_ce = false; + int rc; + + rc = __INET_ECN_decapsulate(outer, inner, &set_ce); + if (!rc && set_ce) INET_ECN_set_ce(skb); - return 0; + return rc; } static inline int IP_ECN_decapsulate(const struct iphdr *oiph, -- cgit v1.2.3 From 5ff4ff4fe8c4e7d0de1d837e489056f0c470667b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Oct 2018 08:53:20 +0000 Subject: net: Add netif_is_vxlan() Add the ability to determine whether a netdev is a VxLAN netdev by calling the above mentioned function that checks the netdev's rtnl_link_ops. This will allow modules to identify netdev events involving a VxLAN netdev and act accordingly. For example, drivers capable of VxLAN offload will need to configure the underlying device when a VxLAN netdev is being enslaved to an offloaded bridge. Convert nfp to use the newly introduced helper. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/vxlan.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index dd3d72ce64b6..95227fa925e8 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -5,6 +5,7 @@ #include #include #include +#include /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -402,4 +403,10 @@ static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) #endif /* IS_ENABLED(CONFIG_IPV6) */ +static inline bool netif_is_vxlan(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "vxlan"); +} + #endif -- cgit v1.2.3 From 9a99735317866e821c75f957fc85c63d049d330c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 17 Oct 2018 08:53:22 +0000 Subject: vxlan: Add switchdev notifications When offloading VXLAN devices, drivers need to know about events in VXLAN FDB database. Since VXLAN models a bridge, it is natural to distribute the VXLAN FDB notifications using the pre-existing switchdev notification mechanism. To that end, introduce two new notification types: SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE and SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE. Introduce a new function, vxlan_fdb_switchdev_call_notifiers() to send the new notifier types, and a struct switchdev_notifier_vxlan_fdb_info to communicate the details of the FDB entry under consideration. Invoke the new function from vxlan_fdb_notify(). Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/switchdev.h | 3 +++ include/net/vxlan.h | 11 +++++++++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d574ce63bf22..47199a11c586 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -145,6 +145,9 @@ enum switchdev_notifier_type { SWITCHDEV_FDB_ADD_TO_DEVICE, SWITCHDEV_FDB_DEL_TO_DEVICE, SWITCHDEV_FDB_OFFLOADED, + + SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE, + SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE, }; struct switchdev_notifier_info { diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 95227fa925e8..3f00877f5edf 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -6,6 +6,7 @@ #include #include #include +#include /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -409,4 +410,14 @@ static inline bool netif_is_vxlan(const struct net_device *dev) !strcmp(dev->rtnl_link_ops->kind, "vxlan"); } +struct switchdev_notifier_vxlan_fdb_info { + struct switchdev_notifier_info info; /* must be first */ + union vxlan_addr remote_ip; + __be16 remote_port; + __be32 remote_vni; + u32 remote_ifindex; + u8 eth_addr[ETH_ALEN]; + __be32 vni; +}; + #endif -- cgit v1.2.3 From 1941f1d6453a527ae8df59891da0319646608444 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 17 Oct 2018 08:53:24 +0000 Subject: vxlan: Add vxlan_fdb_find_uc() for FDB querying A switchdev-capable driver that is aware of VXLAN may need to query VXLAN FDB. In the particular case of mlxsw, this functionality is limited to querying UC FDBs. Those being easier to deal with than the general case of RDST chain traversal, introduce an interface to query specifically UC FDBs: vxlan_fdb_find_uc(). Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/vxlan.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 3f00877f5edf..1828d686ac4f 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -420,4 +420,16 @@ struct switchdev_notifier_vxlan_fdb_info { __be32 vni; }; +#if IS_ENABLED(CONFIG_VXLAN) +int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, + struct switchdev_notifier_vxlan_fdb_info *fdb_info); +#else +static inline int +vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, + struct switchdev_notifier_vxlan_fdb_info *fdb_info) +{ + return -ENOENT; +} +#endif + #endif -- cgit v1.2.3 From 0efe11733356273d734cc2c5ab2dc6f5865cbeb6 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 17 Oct 2018 08:53:26 +0000 Subject: vxlan: Support marking RDSTs as offloaded Offloaded bridge FDB entries are marked with NTF_OFFLOADED. Implement a similar mechanism for VXLAN, where a given remote destination can be marked as offloaded. To that end, introduce a new event, SWITCHDEV_VXLAN_FDB_OFFLOADED, through which the marking is communicated to the vxlan driver. To identify which RDST should be marked as offloaded, an switchdev_notifier_vxlan_fdb_info is passed to the listeners. The "offloaded" flag in that object determines whether the offloaded mark should be set or cleared. When sending offloaded FDB entries over netlink, mark them with NTF_OFFLOADED. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/switchdev.h | 1 + include/net/vxlan.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 47199a11c586..b040f82351ba 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -148,6 +148,7 @@ enum switchdev_notifier_type { SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE, SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE, + SWITCHDEV_VXLAN_FDB_OFFLOADED, }; struct switchdev_notifier_info { diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 1828d686ac4f..03431c148e16 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -192,6 +192,7 @@ union vxlan_addr { struct vxlan_rdst { union vxlan_addr remote_ip; __be16 remote_port; + u8 offloaded:1; __be32 remote_vni; u32 remote_ifindex; struct list_head list; @@ -418,6 +419,7 @@ struct switchdev_notifier_vxlan_fdb_info { u32 remote_ifindex; u8 eth_addr[ETH_ALEN]; __be32 vni; + bool offloaded; }; #if IS_ENABLED(CONFIG_VXLAN) -- cgit v1.2.3 From e9ba0fbc7dd23a74e77960c98c988f59a1ff75aa Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 17 Oct 2018 08:53:29 +0000 Subject: bridge: switchdev: Allow clearing FDB entry offload indication Currently, an FDB entry only ceases being offloaded when it is deleted. This changes with VxLAN encapsulation. Devices capable of performing VxLAN encapsulation usually have only one FDB table, unlike the software data path which has two - one in the bridge driver and another in the VxLAN driver. Therefore, bridge FDB entries pointing to a VxLAN device are only offloaded if there is a corresponding entry in the VxLAN FDB. Allow clearing the offload indication in case the corresponding entry was deleted from the VxLAN FDB. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- include/net/switchdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index b040f82351ba..881ecb1555bf 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -159,7 +159,8 @@ struct switchdev_notifier_fdb_info { struct switchdev_notifier_info info; /* must be first */ const unsigned char *addr; u16 vid; - bool added_by_user; + u8 added_by_user:1, + offloaded:1; }; static inline struct net_device * -- cgit v1.2.3 From b55cbc8d9b44aaee94f19e995a5f241d453763ee Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 17 Oct 2018 16:24:48 +0200 Subject: bpf: fix doc of bpf_skb_adjust_room() in uapi len_diff is signed. Fixes: fa15601ab31e ("bpf: add documentation for eBPF helpers (33-41)") CC: Quentin Monnet Signed-off-by: Nicolas Dichtel Reviewed-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f9187b41dff6..5e46f6732781 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1433,7 +1433,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) + * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. -- cgit v1.2.3 From 82385b0d2d2504aee51aa3fb40ebfb03603f64c3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 17 Oct 2018 15:01:37 +0200 Subject: net: skbuff.h: Mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Acked-by: Kees Cook Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 119d092c6b13..0ba687454267 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3505,13 +3505,19 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, #define __it(x, op) (x -= sizeof(u##op)) #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) case 32: diffs |= __it_diff(a, b, 64); + /* fall through */ case 24: diffs |= __it_diff(a, b, 64); + /* fall through */ case 16: diffs |= __it_diff(a, b, 64); + /* fall through */ case 8: diffs |= __it_diff(a, b, 64); break; case 28: diffs |= __it_diff(a, b, 64); + /* fall through */ case 20: diffs |= __it_diff(a, b, 64); + /* fall through */ case 12: diffs |= __it_diff(a, b, 64); + /* fall through */ case 4: diffs |= __it_diff(a, b, 32); break; } -- cgit v1.2.3 From 605c0ac182c34867bda71bfbcc74958aabbe2fe0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 17 Oct 2018 03:07:50 +0800 Subject: sctp: count both sk and asoc sndbuf with skb truesize and sctp_chunk size Now it's confusing that asoc sndbuf_used is doing memory accounting with SCTP_DATA_SNDSIZE(chunk) + sizeof(sk_buff) + sizeof(sctp_chunk) while sk sk_wmem_alloc is doing that with skb->truesize + sizeof(sctp_chunk). It also causes sctp_prsctp_prune to count with a wrong freed memory when sndbuf_policy is not set. To make this right and also keep consistent between asoc sndbuf_used, sk sk_wmem_alloc and sk_wmem_queued, use skb->truesize + sizeof(sctp_chunk) for them. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 86f034b524d4..8dadc74c22e7 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -148,11 +148,6 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) #define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA || \ a->chunk_hdr->type == SCTP_CID_I_DATA) -/* Calculate the actual data size in a data chunk */ -#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end) - \ - (unsigned long)(c->chunk_hdr) - \ - sctp_datachk_len(&c->asoc->stream))) - /* Internal error codes */ enum sctp_ierror { SCTP_IERROR_NO_ERROR = 0, -- cgit v1.2.3 From 4972e6fa3a04032830bc3d6bb343d08ab3546773 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 12 Sep 2018 15:36:41 +0300 Subject: net/mlx5: Refactor fragmented buffer struct fields and init flow Take struct mlx5_frag_buf out of mlx5_frag_buf_ctrl, as it is not needed to manage and control the datapath of the fragmented buffers API. struct mlx5_frag_buf contains control info to manage the allocation and de-allocation of the fragmented buffer. Its fields are not relevant for datapath, so here I take them out of the struct mlx5_frag_buf_ctrl, except for the fragments array itself. In addition, modified mlx5_fill_fbc to initialise the frags pointers as well. This implies that the buffer must be allocated before the function is called. A set of type-specific *_get_byte_size() functions are replaced by a generic one. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 94ffd02af7cd..e10f61a1f77d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -357,7 +357,7 @@ struct mlx5_frag_buf { }; struct mlx5_frag_buf_ctrl { - struct mlx5_frag_buf frag_buf; + struct mlx5_buf_list *frags; u32 sz_m1; u16 frag_sz_m1; u16 strides_offset; @@ -994,10 +994,12 @@ static inline u32 mlx5_base_mkey(const u32 key) return key & 0xffffff00u; } -static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz, +static inline void mlx5_init_fbc_offset(struct mlx5_buf_list *frags, + u8 log_stride, u8 log_sz, u16 strides_offset, struct mlx5_frag_buf_ctrl *fbc) { + fbc->frags = frags; fbc->log_stride = log_stride; fbc->log_sz = log_sz; fbc->sz_m1 = (1 << fbc->log_sz) - 1; @@ -1006,18 +1008,11 @@ static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz, fbc->strides_offset = strides_offset; } -static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz, +static inline void mlx5_init_fbc(struct mlx5_buf_list *frags, + u8 log_stride, u8 log_sz, struct mlx5_frag_buf_ctrl *fbc) { - mlx5_fill_fbc_offset(log_stride, log_sz, 0, fbc); -} - -static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc, - void *cqc) -{ - mlx5_fill_fbc(6 + MLX5_GET(cqc, cqc, cqe_sz), - MLX5_GET(cqc, cqc, log_cq_size), - fbc); + mlx5_init_fbc_offset(frags, log_stride, log_sz, 0, fbc); } static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc, @@ -1028,8 +1023,7 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc, ix += fbc->strides_offset; frag = ix >> fbc->log_frag_strides; - return fbc->frag_buf.frags[frag].buf + - ((fbc->frag_sz_m1 & ix) << fbc->log_stride); + return fbc->frags[frag].buf + ((fbc->frag_sz_m1 & ix) << fbc->log_stride); } int mlx5_cmd_init(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 4b5b9c7d972e8a7b1e7691c7c921ec0d6dec33b9 Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Tue, 9 Oct 2018 14:16:43 +0300 Subject: net/mlx5: Add FEC fields to Port Phy Link Mode (PPLM) reg Added FEC related fields to PPLM layout. These fields are needed to set and query FEC policy for different link speeds. Signed-off-by: Shay Agroskin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 39 ++++++++++++++++++++++++++++----------- 2 files changed, 29 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e10f61a1f77d..696ed3f7f894 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -133,6 +133,7 @@ enum { MLX5_REG_PVLC = 0x500f, MLX5_REG_PCMR = 0x5041, MLX5_REG_PMLP = 0x5002, + MLX5_REG_PPLM = 0x5023, MLX5_REG_PCAM = 0x507f, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 963611820006..47b09a742ae5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -7828,20 +7828,34 @@ struct mlx5_ifc_pplr_reg_bits { struct mlx5_ifc_pplm_reg_bits { u8 reserved_at_0[0x8]; - u8 local_port[0x8]; - u8 reserved_at_10[0x10]; + u8 local_port[0x8]; + u8 reserved_at_10[0x10]; - u8 reserved_at_20[0x20]; + u8 reserved_at_20[0x20]; - u8 port_profile_mode[0x8]; - u8 static_port_profile[0x8]; - u8 active_port_profile[0x8]; - u8 reserved_at_58[0x8]; + u8 port_profile_mode[0x8]; + u8 static_port_profile[0x8]; + u8 active_port_profile[0x8]; + u8 reserved_at_58[0x8]; - u8 retransmission_active[0x8]; - u8 fec_mode_active[0x18]; + u8 retransmission_active[0x8]; + u8 fec_mode_active[0x18]; - u8 reserved_at_80[0x20]; + u8 rs_fec_correction_bypass_cap[0x4]; + u8 reserved_at_84[0x8]; + u8 fec_override_cap_56g[0x4]; + u8 fec_override_cap_100g[0x4]; + u8 fec_override_cap_50g[0x4]; + u8 fec_override_cap_25g[0x4]; + u8 fec_override_cap_10g_40g[0x4]; + + u8 rs_fec_correction_bypass_admin[0x4]; + u8 reserved_at_a4[0x8]; + u8 fec_override_admin_56g[0x4]; + u8 fec_override_admin_100g[0x4]; + u8 fec_override_admin_50g[0x4]; + u8 fec_override_admin_25g[0x4]; + u8 fec_override_admin_10g_40g[0x4]; }; struct mlx5_ifc_ppcnt_reg_bits { @@ -8137,7 +8151,10 @@ struct mlx5_ifc_pcam_enhanced_features_bits { struct mlx5_ifc_pcam_regs_5000_to_507f_bits { u8 port_access_reg_cap_mask_127_to_96[0x20]; u8 port_access_reg_cap_mask_95_to_64[0x20]; - u8 port_access_reg_cap_mask_63_to_32[0x20]; + + u8 port_access_reg_cap_mask_63_to_36[0x1c]; + u8 pplm[0x1]; + u8 port_access_reg_cap_mask_34_to_32[0x3]; u8 port_access_reg_cap_mask_31_to_13[0x13]; u8 pbmc[0x1]; -- cgit v1.2.3 From 67daf1186086ad4b2ec09b8078b835936977d06a Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Sun, 30 Sep 2018 09:58:08 +0300 Subject: net/mlx5: Added "per_lane_error_counters" cap bit to PCAM Added "Per lane raw errors" capability bit in Ports Capabilities Mask (PCAM) enhanced features layout. This bit determines if the fields "phy_raw_errors_laneX" in "Physical Layer statistical" counters group are supported. Signed-off-by: Shay Agroskin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 47b09a742ae5..dbff9ff28f2c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8140,7 +8140,8 @@ struct mlx5_ifc_pcam_enhanced_features_bits { u8 rx_icrc_encapsulated_counter[0x1]; u8 reserved_at_6e[0x8]; u8 pfcc_mask[0x1]; - u8 reserved_at_77[0x4]; + u8 reserved_at_77[0x3]; + u8 per_lane_error_counters[0x1]; u8 rx_buffer_fullness_counters[0x1]; u8 ptys_connector_type[0x1]; u8 reserved_at_7d[0x1]; -- cgit v1.2.3 From 5f1be84aad4b520a36246d0c289ad73641277630 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 12 Oct 2018 03:01:54 +0900 Subject: netfilter: nf_flow_table: remove unnecessary parameter of nf_flow_table_cleanup() parameter net of nf_flow_table_cleanup() is not used. So that it can be removed. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 0e355f4a3d76..77e2761d4f2f 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -99,7 +99,7 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table, void (*iter)(struct flow_offload *flow, void *data), void *data); -void nf_flow_table_cleanup(struct net *net, struct net_device *dev); +void nf_flow_table_cleanup(struct net_device *dev); int nf_flow_table_init(struct nf_flowtable *flow_table); void nf_flow_table_free(struct nf_flowtable *flow_table); -- cgit v1.2.3 From 468c041cff57e87f18e1022cacf9f5c98bf00b58 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 18 Oct 2018 22:29:59 +0900 Subject: netfilter: nfnetlink_log: remove empty nfnetlink_log.h header file /include/net/netfilter/nfnetlink_log.h file is empty. so that it can be removed. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nfnetlink_log.h | 1 - 1 file changed, 1 deletion(-) delete mode 100644 include/net/netfilter/nfnetlink_log.h (limited to 'include') diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/include/net/netfilter/nfnetlink_log.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -- cgit v1.2.3 From af510ebd8913bee016492832f532ed919b51c09c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 19 Oct 2018 11:48:24 +0200 Subject: Revert "netfilter: xt_quota: fix the behavior of xt_quota module" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit e9837e55b0200da544a095a1fca36efd7fd3ba30. When talking to Maze and Chenbo, we agreed to keep this back by now due to problems in the ruleset listing path with 32-bit arches. Signed-off-by: Maciej Żenczykowski Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_quota.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/xt_quota.h b/include/uapi/linux/netfilter/xt_quota.h index d72fd52adbba..f3ba5d9e58b6 100644 --- a/include/uapi/linux/netfilter/xt_quota.h +++ b/include/uapi/linux/netfilter/xt_quota.h @@ -15,11 +15,9 @@ struct xt_quota_info { __u32 flags; __u32 pad; __aligned_u64 quota; -#ifdef __KERNEL__ - atomic64_t counter; -#else - __aligned_u64 remain; -#endif + + /* Used internally by the kernel */ + struct xt_quota_priv *master; }; #endif /* _XT_QUOTA_H */ -- cgit v1.2.3 From 144991602e6a14d667b295f1b099e609ce857772 Mon Sep 17 00:00:00 2001 From: Mauricio Vasquez B Date: Thu, 18 Oct 2018 15:16:09 +0200 Subject: bpf: rename stack trace map operations In the following patches queue and stack maps (FIFO and LIFO datastructures) will be implemented. In order to avoid confusion and a possible name clash rename stack_map_ops to stack_trace_map_ops Signed-off-by: Mauricio Vasquez B Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/linux/bpf_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fa48343a5ea1..7bad4e1947ed 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -51,7 +51,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops) #ifdef CONFIG_PERF_EVENTS -BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) -- cgit v1.2.3 From 2ea864c58f19bf70a0e2415f9f1c53814e07f1b4 Mon Sep 17 00:00:00 2001 From: Mauricio Vasquez B Date: Thu, 18 Oct 2018 15:16:20 +0200 Subject: bpf/verifier: add ARG_PTR_TO_UNINIT_MAP_VALUE ARG_PTR_TO_UNINIT_MAP_VALUE argument is a pointer to a memory zone used to save the value of a map. Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not be passed as an extra argument. This will be used in the following patch that implements some new helpers that receive a pointer to be filled with a map value. Signed-off-by: Mauricio Vasquez B Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e60fff48288b..0f8b863e0229 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -138,6 +138,7 @@ enum bpf_arg_type { ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ + ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack -- cgit v1.2.3 From f1a2e44a3aeccb3ff18d3ccc0b0203e70b95bd92 Mon Sep 17 00:00:00 2001 From: Mauricio Vasquez B Date: Thu, 18 Oct 2018 15:16:25 +0200 Subject: bpf: add queue and stack maps Queue/stack maps implement a FIFO/LIFO data storage for ebpf programs. These maps support peek, pop and push operations that are exposed to eBPF programs through the new bpf_map[peek/pop/push] helpers. Those operations are exposed to userspace applications through the already existing syscalls in the following way: BPF_MAP_LOOKUP_ELEM -> peek BPF_MAP_LOOKUP_AND_DELETE_ELEM -> pop BPF_MAP_UPDATE_ELEM -> push Queue/stack maps are implemented using a buffer, tail and head indexes, hence BPF_F_NO_PREALLOC is not supported. As opposite to other maps, queue and stack do not use RCU for protecting maps values, the bpf_map[peek/pop] have a ARG_PTR_TO_UNINIT_MAP_VALUE argument that is a pointer to a memory zone where to save the value of a map. Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not be passed as an extra argument. Our main motivation for implementing queue/stack maps was to keep track of a pool of elements, like network ports in a SNAT, however we forsee other use cases, like for exampling saving last N kernel events in a map and then analysing from userspace. Signed-off-by: Mauricio Vasquez B Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++++++ include/linux/bpf_types.h | 2 ++ include/uapi/linux/bpf.h | 29 ++++++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0f8b863e0229..33014ae73103 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -39,6 +39,9 @@ struct bpf_map_ops { void *(*map_lookup_elem)(struct bpf_map *map, void *key); int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); int (*map_delete_elem)(struct bpf_map *map, void *key); + int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); + int (*map_pop_elem)(struct bpf_map *map, void *value); + int (*map_peek_elem)(struct bpf_map *map, void *value); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, @@ -811,6 +814,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_map_push_elem_proto; +extern const struct bpf_func_proto bpf_map_pop_elem_proto; +extern const struct bpf_func_proto bpf_map_peek_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 7bad4e1947ed..44d9ab4809bd 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) #endif #endif +BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5e46f6732781..70082cb626b4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -128,6 +128,8 @@ enum bpf_map_type { BPF_MAP_TYPE_CGROUP_STORAGE, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_QUEUE, + BPF_MAP_TYPE_STACK, }; enum bpf_prog_type { @@ -462,6 +464,28 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is removed to + * make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -2303,7 +2327,10 @@ union bpf_attr { FN(skb_ancestor_cgroup_id), \ FN(sk_lookup_tcp), \ FN(sk_lookup_udp), \ - FN(sk_release), + FN(sk_release), \ + FN(map_push_elem), \ + FN(map_pop_elem), \ + FN(map_peek_elem), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From bd513cd08f10cbe28856f99ae951e86e86803861 Mon Sep 17 00:00:00 2001 From: Mauricio Vasquez B Date: Thu, 18 Oct 2018 15:16:30 +0200 Subject: bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall The previous patch implemented a bpf queue/stack maps that provided the peek/pop/push functions. There is not a direct relationship between those functions and the current maps syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added, this is mapped to the pop operation in the queue/stack maps and it is still to implement in other kind of maps. Signed-off-by: Mauricio Vasquez B Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 70082cb626b4..a2fb333290dc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -103,6 +103,7 @@ enum bpf_cmd { BPF_BTF_LOAD, BPF_BTF_GET_FD_BY_ID, BPF_TASK_FD_QUERY, + BPF_MAP_LOOKUP_AND_DELETE_ELEM, }; enum bpf_map_type { -- cgit v1.2.3 From b39b5f411dcfce28ff954e5d6acb2c11be3cb0ec Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 19 Oct 2018 09:57:57 -0700 Subject: bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the skb. This patch enables direct access of skb for these programs. Two helper functions bpf_compute_and_save_data_end() and bpf_restore_data_end() are introduced. There are used in __cgroup_bpf_run_filter_skb(), to compute proper data_end for the BPF program, and restore original data afterwards. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 5771874bc01e..91b4c934f02e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -548,6 +548,27 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb) cb->data_end = skb->data + skb_headlen(skb); } +/* Similar to bpf_compute_data_pointers(), except that save orginal + * data in cb->data and cb->meta_data for restore. + */ +static inline void bpf_compute_and_save_data_end( + struct sk_buff *skb, void **saved_data_end) +{ + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; + + *saved_data_end = cb->data_end; + cb->data_end = skb->data + skb_headlen(skb); +} + +/* Restore data saved by bpf_compute_data_pointers(). */ +static inline void bpf_restore_data_end( + struct sk_buff *skb, void *saved_data_end) +{ + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; + + cb->data_end = saved_data_end; +} + static inline u8 *bpf_skb_cb(struct sk_buff *skb) { /* eBPF programs may read/write skb->cb[] area to transfer meta -- cgit v1.2.3 From 5032d079909d1ac5c2535acc32d5f01cd245d8ea Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 18 Oct 2018 13:58:35 -0700 Subject: bpf: skmsg, fix psock create on existing kcm/tls port Before using the psock returned by sk_psock_get() when adding it to a sockmap we need to ensure it is actually a sockmap based psock. Previously we were only checking this after incrementing the reference counter which was an error. This resulted in a slab-out-of-bounds error when the psock was not actually a sockmap type. This moves the check up so the reference counter is only used if it is a sockmap psock. Eric reported the following KASAN BUG, BUG: KASAN: slab-out-of-bounds in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] BUG: KASAN: slab-out-of-bounds in refcount_inc_not_zero_checked+0x97/0x2f0 lib/refcount.c:120 Read of size 4 at addr ffff88019548be58 by task syz-executor4/22387 CPU: 1 PID: 22387 Comm: syz-executor4 Not tainted 4.19.0-rc7+ #264 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267 kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272 atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] refcount_inc_not_zero_checked+0x97/0x2f0 lib/refcount.c:120 sk_psock_get include/linux/skmsg.h:379 [inline] sock_map_link.isra.6+0x41f/0xe30 net/core/sock_map.c:178 sock_hash_update_common+0x19b/0x11e0 net/core/sock_map.c:669 sock_hash_update_elem+0x306/0x470 net/core/sock_map.c:738 map_update_elem+0x819/0xdf0 kernel/bpf/syscall.c:818 Signed-off-by: John Fastabend Reported-by: Eric Dumazet Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Daniel Borkmann --- include/linux/skmsg.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 22347b08e1f8..84e18863f6a4 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -270,11 +270,6 @@ static inline struct sk_psock *sk_psock(const struct sock *sk) return rcu_dereference_sk_user_data(sk); } -static inline bool sk_has_psock(struct sock *sk) -{ - return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg; -} - static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { @@ -374,6 +369,26 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock, return test_bit(bit, &psock->state); } +static inline struct sk_psock *sk_psock_get_checked(struct sock *sk) +{ + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock(sk); + if (psock) { + if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) { + psock = ERR_PTR(-EBUSY); + goto out; + } + + if (!refcount_inc_not_zero(&psock->refcnt)) + psock = ERR_PTR(-EBUSY); + } +out: + rcu_read_unlock(); + return psock; +} + static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; -- cgit v1.2.3 From c9fbd71f73094311b31ee703a918e9e0df502cef Mon Sep 17 00:00:00 2001 From: Debabrata Banerjee Date: Thu, 18 Oct 2018 11:18:26 -0400 Subject: netpoll: allow cleanup to be synchronous This fixes a problem introduced by: commit 2cde6acd49da ("netpoll: Fix __netpoll_rcu_free so that it can hold the rtnl lock") When using netconsole on a bond, __netpoll_cleanup can asynchronously recurse multiple times, each __netpoll_free_async call can result in more __netpoll_free_async's. This means there is now a race between cleanup_work queues on multiple netpoll_info's on multiple devices and the configuration of a new netpoll. For example if a netconsole is set to enable 0, reconfigured, and enable 1 immediately, this netconsole will likely not work. Given the reason for __netpoll_free_async is it can be called when rtnl is not locked, if it is locked, we should be able to execute synchronously. It appears to be locked everywhere it's called from. Generalize the design pattern from the teaming driver for current callers of __netpoll_free_async. CC: Neil Horman CC: "David S. Miller" Signed-off-by: Debabrata Banerjee Signed-off-by: David S. Miller --- include/linux/netpoll.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 3ef82d3a78db..676f1ff161a9 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -31,8 +31,6 @@ struct netpoll { bool ipv6; u16 local_port, remote_port; u8 remote_mac[ETH_ALEN]; - - struct work_struct cleanup_work; }; struct netpoll_info { @@ -63,7 +61,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt); int __netpoll_setup(struct netpoll *np, struct net_device *ndev); int netpoll_setup(struct netpoll *np); void __netpoll_cleanup(struct netpoll *np); -void __netpoll_free_async(struct netpoll *np); +void __netpoll_free(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, struct net_device *dev); -- cgit v1.2.3 From bff5b4b3737219195ca0caef4ff7884303cb5dc1 Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Thu, 18 Oct 2018 15:06:01 -0400 Subject: net: phy: micrel: add Microchip KSZ9131 initial driver Add support for Microchip Technology KSZ9131 10/100/1000 Ethernet PHY Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller --- include/linux/micrel_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 472fa4d4ea62..7361cd3fddc1 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -31,6 +31,7 @@ #define PHY_ID_KSZ8081 0x00221560 #define PHY_ID_KSZ8061 0x00221570 #define PHY_ID_KSZ9031 0x00221620 +#define PHY_ID_KSZ9131 0x00221640 #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 -- cgit v1.2.3 From 6fff607e2f14bd7c63c06c464a6f93b8efbabe28 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Oct 2018 19:56:49 -0700 Subject: bpf: sk_msg program helper bpf_msg_push_data This allows user to push data into a msg using sk_msg program types. The format is as follows, bpf_msg_push_data(msg, offset, len, flags) this will insert 'len' bytes at offset 'offset'. For example to prepend 10 bytes at the front of the message the user can, bpf_msg_push_data(msg, 0, 10, 0); This will invalidate data bounds so BPF user will have to then recheck data bounds after calling this. After this the msg size will have been updated and the user is free to write into the added bytes. We allow any offset/len as long as it is within the (data, data_end) range. However, a copy will be required if the ring is full and its possible for the helper to fail with ENOMEM or EINVAL errors which need to be handled by the BPF program. This can be used similar to XDP metadata to pass data between sk_msg layer and lower layers. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/linux/skmsg.h | 5 +++++ include/uapi/linux/bpf.h | 20 +++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 84e18863f6a4..2a11e9d91dfa 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -207,6 +207,11 @@ static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which) return &msg->sg.data[which]; } +static inline struct scatterlist sk_msg_elem_cpy(struct sk_msg *msg, int which) +{ + return msg->sg.data[which]; +} + static inline struct page *sk_msg_page(struct sk_msg *msg, int which) { return sg_page(sk_msg_elem(msg, which)); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a2fb333290dc..852dc17ab47a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2240,6 +2240,23 @@ union bpf_attr { * pointer that was returned from bpf_sk_lookup_xxx\ (). * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) + * Description + * For socket policies, insert *len* bytes into msg at offset + * *start*. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it may want to insert metadata or options into the msg. + * This can later be read and used by any of the lower layer BPF + * hooks. + * + * This helper may fail if under memory pressure (a malloc + * fails) in these cases BPF programs will get an appropriate + * error and BPF programs will need to handle them. + * + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2331,7 +2348,8 @@ union bpf_attr { FN(sk_release), \ FN(map_push_elem), \ FN(map_pop_elem), \ - FN(map_peek_elem), + FN(map_peek_elem), \ + FN(msg_push_data), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From c16ee04c9b305d57719344922c4d48379e206a79 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 21 Oct 2018 02:09:23 +0200 Subject: ulp: remove uid and user_visible members They are not used anymore and therefore should be removed. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 14fdd7ce9992..8a61c3e8c15d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2051,11 +2051,6 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer); #define TCP_ULP_MAX 128 #define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) -enum { - TCP_ULP_TLS, - TCP_ULP_BPF, -}; - struct tcp_ulp_ops { struct list_head list; @@ -2064,9 +2059,7 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); - int uid; char name[TCP_ULP_NAME_MAX]; - bool user_visible; struct module *owner; }; int tcp_register_ulp(struct tcp_ulp_ops *type); -- cgit v1.2.3 From 81fa7a69c2174ed8de314b9c231ef30a8718e5e1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 19 Oct 2018 11:19:13 +0200 Subject: dt-bindings: phy: Update SERDES_MAX to be SERDES_MAX + 1 SERDES_MAX is a valid value to index ctrl->phys in drivers/phy/mscc/phy-ocelot-serdes.c. But, currently, there is an out-of-bounds bug in the mentioned driver when reading from ctrl->phys, because the size of array ctrl->phys is SERDES_MAX. Partially fix this by updating SERDES_MAX to be SERDES6G_MAX + 1. Notice that this is the first part of the solution to the out-of-bounds bug mentioned above. Although this change is not dependent on any other one. Suggested-by: Quentin Schulz Reviewed-by: Quentin Schulz Acked-by: Rob Herring Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/dt-bindings/phy/phy-ocelot-serdes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/phy/phy-ocelot-serdes.h b/include/dt-bindings/phy/phy-ocelot-serdes.h index bd28f21206f6..fe70adaca68f 100644 --- a/include/dt-bindings/phy/phy-ocelot-serdes.h +++ b/include/dt-bindings/phy/phy-ocelot-serdes.h @@ -7,6 +7,6 @@ #define SERDES1G_MAX SERDES1G(5) #define SERDES6G(x) (SERDES1G_MAX + 1 + (x)) #define SERDES6G_MAX SERDES6G(2) -#define SERDES_MAX SERDES6G_MAX +#define SERDES_MAX (SERDES6G_MAX + 1) #endif -- cgit v1.2.3 From 876dcf2f3aaa0f68d437b368b93a4c4b81521191 Mon Sep 17 00:00:00 2001 From: Olivier Brunel Date: Sat, 20 Oct 2018 19:39:56 +0200 Subject: umh: Add command line to user mode helpers User mode helpers were spawned without a command line, and because an empty command line is used by many tools to identify processes as kernel threads, this could cause some issues. Notably during killing spree on shutdown, since such helper would then be skipped (i.e. not killed) which would result in the process remaining alive, and thus preventing unmouting of the rootfs (as experienced with the bpfilter umh). Fixes: 449325b52b7a ("umh: introduce fork_usermode_blob() helper") Signed-off-by: Olivier Brunel Signed-off-by: David S. Miller --- include/linux/umh.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/umh.h b/include/linux/umh.h index 5c812acbb80a..235f51b62c71 100644 --- a/include/linux/umh.h +++ b/include/linux/umh.h @@ -44,6 +44,7 @@ struct subprocess_info *call_usermodehelper_setup_file(struct file *file, int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data); struct umh_info { + const char *cmdline; struct file *pipe_to_umh; struct file *pipe_from_umh; pid_t pid; -- cgit v1.2.3 From 424c22fb62427362f1f660cd83fbdc41aec488b6 Mon Sep 17 00:00:00 2001 From: Vito Caputo Date: Sun, 21 Oct 2018 04:33:03 -0700 Subject: af_unix.h: trivial whitespace cleanup Replace spurious spaces with a tab and remove superfluous tab from unix_sock struct. Signed-off-by: Vito Caputo Signed-off-by: David S. Miller --- include/net/af_unix.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index a5ba41b3b867..e2695c4bf358 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -52,7 +52,7 @@ struct unix_skb_parms { struct unix_sock { /* WARNING: sk has to be the first member */ struct sock sk; - struct unix_address *addr; + struct unix_address *addr; struct path path; struct mutex iolock, bindlock; struct sock *peer; @@ -63,7 +63,7 @@ struct unix_sock { #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; - wait_queue_entry_t peer_wake; + wait_queue_entry_t peer_wake; }; static inline struct unix_sock *unix_sk(const struct sock *sk) -- cgit v1.2.3 From ba59d5705825fb9cab3ff092552802f4fefc3635 Mon Sep 17 00:00:00 2001 From: Mathias Thore Date: Mon, 22 Oct 2018 14:55:50 +0200 Subject: net/wan/fsl_ucc_hdlc: error counters Extract error information from rx and tx buffer descriptors, and update error counters. Signed-off-by: Mathias Thore Signed-off-by: David S. Miller --- include/soc/fsl/qe/ucc_fast.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h index 3ee9e7c1a7d7..dcd6b865b590 100644 --- a/include/soc/fsl/qe/ucc_fast.h +++ b/include/soc/fsl/qe/ucc_fast.h @@ -41,8 +41,12 @@ #define R_L_S 0x0800 /* last */ #define R_F_S 0x0400 /* first */ #define R_CM_S 0x0200 /* continuous mode */ +#define R_LG_S 0x0020 /* frame length */ +#define R_NO_S 0x0010 /* nonoctet */ +#define R_AB_S 0x0008 /* abort */ #define R_CR_S 0x0004 /* crc */ -#define R_OV_S 0x0002 /* crc */ +#define R_OV_S 0x0002 /* overrun */ +#define R_CD_S 0x0001 /* carrier detect */ /* transmit BD's status */ #define T_R_S 0x8000 /* ready bit */ @@ -51,6 +55,8 @@ #define T_L_S 0x0800 /* last */ #define T_TC_S 0x0400 /* crc */ #define T_TM_S 0x0200 /* continuous mode */ +#define T_UN_S 0x0002 /* hdlc underrun */ +#define T_CT_S 0x0001 /* hdlc carrier lost */ /* Rx Data buffer must be 4 bytes aligned in most cases */ #define UCC_FAST_RX_ALIGN 4 -- cgit v1.2.3 From 89ab066d4229acd32e323f1569833302544a4186 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Tue, 23 Oct 2018 13:40:39 +0200 Subject: Revert "net: simplify sock_poll_wait" This reverts commit dd979b4df817e9976f18fb6f9d134d6bc4a3c317. This broke tcp_poll for SMC fallback: An AF_SMC socket establishes an internal TCP socket for the initial handshake with the remote peer. Whenever the SMC connection can not be established this TCP socket is used as a fallback. All socket operations on the SMC socket are then forwarded to the TCP socket. In case of poll, the file->private_data pointer references the SMC socket because the TCP socket has no file assigned. This causes tcp_poll to wait on the wrong socket. Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- include/net/sock.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 2440f8b407eb..f665d74ae509 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2059,14 +2059,20 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq) /** * sock_poll_wait - place memory barrier behind the poll_wait call. * @filp: file + * @sock: socket to wait on * @p: poll_table * * See the comments in the wq_has_sleeper function. + * + * Do not derive sock from filp->private_data here. An SMC socket establishes + * an internal TCP socket that is used in the fallback case. All socket + * operations on the SMC socket are then forwarded to the TCP socket. In case of + * poll, the filp->private_data pointer references the SMC socket because the + * TCP socket has no file assigned. */ -static inline void sock_poll_wait(struct file *filp, poll_table *p) +static inline void sock_poll_wait(struct file *filp, struct socket *sock, + poll_table *p) { - struct socket *sock = filp->private_data; - if (!poll_does_not_wait(p)) { poll_wait(filp, &sock->wq->wait, p); /* We need to be sure we are in sync with the -- cgit v1.2.3 From 3f80e08f40cdb308589a49077c87632fa4508b21 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Oct 2018 11:54:16 -0700 Subject: tcp: add tcp_reset_xmit_timer() helper With EDT model, SRTT no longer is inflated by pacing delays. This means that RTO and some other xmit timers might be setup incorrectly. This is particularly visible with either : - Very small enforced pacing rates (SO_MAX_PACING_RATE) - Reduced rto (from the default 200 ms) This can lead to TCP flows aborts in the worst case, or spurious retransmits in other cases. For example, this session gets far more throughput than the requested 80kbit : $ netperf -H 127.0.0.2 -l 100 -- -q 10000 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 127.0.0.2 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 540000 262144 262144 104.00 2.66 With the fix : $ netperf -H 127.0.0.2 -l 100 -- -q 10000 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 127.0.0.2 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 540000 262144 262144 104.00 0.12 EDT allows for better control of rtx timers, since TCP has a better idea of the earliest departure time of each skb in the rtx queue. We only have to eventually add to the timer the difference of the EDT time with current time. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8a61c3e8c15d..a18914d20486 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1245,8 +1245,31 @@ static inline bool tcp_needs_internal_pacing(const struct sock *sk) return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED; } +/* Return in jiffies the delay before one skb is sent. + * If @skb is NULL, we look at EDT for next packet being sent on the socket. + */ +static inline unsigned long tcp_pacing_delay(const struct sock *sk, + const struct sk_buff *skb) +{ + s64 pacing_delay = skb ? skb->tstamp : tcp_sk(sk)->tcp_wstamp_ns; + + pacing_delay -= tcp_sk(sk)->tcp_clock_cache; + + return pacing_delay > 0 ? nsecs_to_jiffies(pacing_delay) : 0; +} + +static inline void tcp_reset_xmit_timer(struct sock *sk, + const int what, + unsigned long when, + const unsigned long max_when, + const struct sk_buff *skb) +{ + inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk, skb), + max_when); +} + /* Something is really bad, we could not queue an additional packet, - * because qdisc is full or receiver sent a 0 window. + * because qdisc is full or receiver sent a 0 window, or we are paced. * We do not want to add fuel to the fire, or abort too early, * so make sure the timer we arm now is at least 200ms in the future, * regardless of current icsk_rto value (as it could be ~2ms) @@ -1268,8 +1291,9 @@ static inline unsigned long tcp_probe0_when(const struct sock *sk, static inline void tcp_check_probe_timer(struct sock *sk) { if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - tcp_probe0_base(sk), TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + tcp_probe0_base(sk), TCP_RTO_MAX, + NULL); } static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) -- cgit v1.2.3