From 92ee76d4a926843d0e135aa0c2d9f57504c6876c Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 6 Dec 2019 15:31:03 +0100 Subject: trivial: mac80211: fix indentation Signed-off-by: John Crispin Link: https://lore.kernel.org/r/20191206143103.3645-1-john@phrozen.org Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7d3f2ced92d1..e172d0c7bf74 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2047,7 +2047,7 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) #define IEEE80211_HE_OPERATION_ER_SU_DISABLE 0x00010000 #define IEEE80211_HE_OPERATION_6GHZ_OP_INFO 0x00020000 #define IEEE80211_HE_OPERATION_BSS_COLOR_MASK 0x3f000000 -#define IEEE80211_HE_OPERATION_BSS_COLOR_OFFSET 24 +#define IEEE80211_HE_OPERATION_BSS_COLOR_OFFSET 24 #define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x40000000 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED 0x80000000 -- cgit v1.2.3 From 30b2f0be23fb40e58d0ad2caf8702c2a44cda2e1 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 13 Jan 2020 21:59:40 -0800 Subject: mac80211: add ieee80211_is_any_nullfunc() commit 08a5bdde3812 ("mac80211: consider QoS Null frames for STA_NULLFUNC_ACKED") Fixed a bug where we failed to take into account a nullfunc frame can be either non-QoS or QoS. It turns out there is at least one more bug in ieee80211_sta_tx_notify(), introduced in commit 7b6ddeaf27ec ("mac80211: use QoS NDP for AP probing"), where we forgot to check for the QoS variant and so assumed the QoS nullfunc frame never went out Fix this by adding a helper ieee80211_is_any_nullfunc() which consolidates the check for non-QoS and QoS nullfunc frames. Replace existing compound conditionals and add a couple more missing checks for QoS variant. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200114055940.18502-3-thomas@adapt-ip.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e172d0c7bf74..1c4409b4c012 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -619,6 +619,15 @@ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); } +/** + * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee80211_is_any_nullfunc(__le16 fc) +{ + return (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)); +} + /** * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU * @fc: frame control field in little-endian byteorder -- cgit v1.2.3 From f93d6b21a93ceb02140eafd84e4fd77f5d00180a Mon Sep 17 00:00:00 2001 From: Zvika Yehudai Date: Mon, 3 Feb 2020 10:08:23 +0200 Subject: ieee80211: fix 'the' doubling in comments Remove redundant 'the' where 'the the' was written. Signed-off-by: Zvika Yehudai Link: https://lore.kernel.org/r/20200203080823.24949-1-zvikayeh@gmail.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 1c4409b4c012..095a7108c394 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2062,7 +2062,7 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) /* * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size - * @he_oper_ie: byte data of the He Operations IE, stating from the the byte + * @he_oper_ie: byte data of the He Operations IE, stating from the byte * after the ext ID byte. It is assumed that he_oper_ie has at least * sizeof(struct ieee80211_he_operation) bytes, the caller must have * validated this. @@ -2100,7 +2100,7 @@ ieee80211_he_oper_size(const u8 *he_oper_ie) /* * ieee80211_he_spr_size - calculate 802.11ax HE Spatial Reuse IE size - * @he_spr_ie: byte data of the He Spatial Reuse IE, stating from the the byte + * @he_spr_ie: byte data of the He Spatial Reuse IE, stating from the byte * after the ext ID byte. It is assumed that he_spr_ie has at least * sizeof(struct ieee80211_he_spr) bytes, the caller must have validated * this @@ -2743,7 +2743,7 @@ enum ieee80211_tdls_actioncode { */ #define WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT BIT(6) -/* TDLS capabilities in the the 4th byte of @WLAN_EID_EXT_CAPABILITY */ +/* TDLS capabilities in the 4th byte of @WLAN_EID_EXT_CAPABILITY */ #define WLAN_EXT_CAPA4_TDLS_BUFFER_STA BIT(4) #define WLAN_EXT_CAPA4_TDLS_PEER_PSM BIT(5) #define WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH BIT(6) -- cgit v1.2.3 From c0058df73309906ef4d5383fbaa10c43ebddc48a Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Fri, 31 Jan 2020 13:12:57 +0200 Subject: mac80211: parse also the RSNXE IE Parse also the RSN Extension IE when parsing the rest of the IEs. It will be used in a later patch. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-21-luca@coelho.fi Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 095a7108c394..6f3e7c5c600a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2532,6 +2532,7 @@ enum ieee80211_eid { WLAN_EID_FILS_INDICATION = 240, WLAN_EID_DILS = 241, WLAN_EID_FRAGMENT = 242, + WLAN_EID_RSNX = 244, WLAN_EID_EXTENSION = 255 }; @@ -3421,4 +3422,11 @@ static inline bool for_each_element_completed(const struct element *element, return (const u8 *)element == (const u8 *)data + datalen; } +/** + * RSNX Capabilities: + * bits 0-3: Field length (n-1) + */ +#define WLAN_RSNX_CAPA_PROTECTED_TWT BIT(4) +#define WLAN_RSNX_CAPA_SAE_H2E BIT(5) + #endif /* LINUX_IEEE80211_H */ -- cgit v1.2.3 From 256db7423c31c873abe6fb780513dd7b5705a510 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Thu, 13 Feb 2020 13:16:17 +0000 Subject: ieee80211: add WPA3 OWE AKM suite selector Add the definition for Opportunistic Wireless Encryption AKM selector. Signed-off-by: Sergey Matyukevich Link: https://lore.kernel.org/r/20200213131608.10541-3-sergey.matyukevich.os@quantenna.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 6f3e7c5c600a..33d907eec0b6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3044,6 +3044,7 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_AKM_SUITE_FILS_SHA384 SUITE(0x000FAC, 15) #define WLAN_AKM_SUITE_FT_FILS_SHA256 SUITE(0x000FAC, 16) #define WLAN_AKM_SUITE_FT_FILS_SHA384 SUITE(0x000FAC, 17) +#define WLAN_AKM_SUITE_OWE SUITE(0x000FAC, 18) #define WLAN_MAX_KEY_LEN 32 -- cgit v1.2.3 From d71151a39c97d551378a441c089508b0bca48210 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 11 Feb 2020 12:52:49 +0800 Subject: ptp_qoriq: drop the code of alarm The alarm function hadn't been supported by PTP clock driver. The recommended solution PHC + phc2sys + nanosleep provides best performance. So drop the code of alarm in ptp_qoriq driver. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- include/linux/fsl/ptp_qoriq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index b0b743563f43..75884563059f 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -149,8 +149,6 @@ struct ptp_qoriq { bool extts_fifo_support; int irq; int phc_index; - u64 alarm_interval; /* for periodic alarm */ - u64 alarm_value; u32 tclk_period; /* nanoseconds */ u32 tmr_prsc; u32 tmr_add; -- cgit v1.2.3 From 8062e2333f8f7dcd5627e22b99e18d1cbb53eedb Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 15 Feb 2020 23:57:36 +0000 Subject: net: linkmode: make linkmode_test_bit() take const pointer linkmode_test_bit() does not modify the address; test_bit() is also declared const volatile for the same reason. There's no need for linkmode_test_bit() to be any different, and allows implementation of helpers that take a const linkmode pointer. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/linkmode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index fe740031339d..8e5b352e44f2 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -71,7 +71,7 @@ static inline void linkmode_change_bit(int nr, volatile unsigned long *addr) __change_bit(nr, addr); } -static inline int linkmode_test_bit(int nr, volatile unsigned long *addr) +static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr) { return test_bit(nr, addr); } -- cgit v1.2.3 From a87ae8a963bde755b0962bcc18db83d611f63e7a Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 15 Feb 2020 15:49:27 +0000 Subject: net: add helpers to resolve negotiated flow control Add a couple of helpers to resolve negotiated flow control. Two helpers are provided: - linkmode_resolve_pause() which takes the link partner and local advertisements, and decodes whether we should enable TX or RX pause at the MAC. This is useful outside of phylib, e.g. in phylink. - phy_get_pause(), which returns the TX/RX enablement status for the current negotiation results of the PHY. This allows us to centralise the flow control resolution, rather than spreading it around. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/linkmode.h | 4 ++++ include/linux/phy.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index 8e5b352e44f2..9ec210f31d06 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -88,4 +88,8 @@ static inline int linkmode_subset(const unsigned long *src1, return bitmap_subset(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); } +void linkmode_resolve_pause(const unsigned long *local_adv, + const unsigned long *partner_adv, + bool *tx_pause, bool *rx_pause); + #endif /* __LINKMODE_H */ diff --git a/include/linux/phy.h b/include/linux/phy.h index c570e162e05e..80f8b2158271 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1257,6 +1257,9 @@ void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); bool phy_validate_pause(struct phy_device *phydev, struct ethtool_pauseparam *pp); +void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause); +void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv, + bool *tx_pause, bool *rx_pause); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); -- cgit v1.2.3 From 45c767faef151899ac1a5e14a59c0e0a5bdba27b Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 15 Feb 2020 15:49:32 +0000 Subject: net: add linkmode helper for setting flow control advertisement Add a linkmode helper to set the flow control advertisement in an ethtool linkmode mask according to the tx/rx capabilities. This implementation is moved from phylib, and documented with an analysis of its shortcomings. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/linkmode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index 9ec210f31d06..c664c27a29a0 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -92,4 +92,6 @@ void linkmode_resolve_pause(const unsigned long *local_adv, const unsigned long *partner_adv, bool *tx_pause, bool *rx_pause); +void linkmode_set_pause(unsigned long *advertisement, bool tx, bool rx); + #endif /* __LINKMODE_H */ -- cgit v1.2.3 From 4e5aeb4157c879a021e6d92373dc7e4684ebd8c0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 15 Feb 2020 15:49:53 +0000 Subject: net: phylink: resolve fixed link flow control Resolve the fixed link flow control using the recently introduced linkmode_resolve_pause() helper, which we use in phylink_get_fixed_state() only when operating in full duplex mode. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phylink.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 523209e70947..0d6073c2b2b7 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -12,12 +12,10 @@ struct net_device; enum { MLO_PAUSE_NONE, - MLO_PAUSE_ASYM = BIT(0), - MLO_PAUSE_SYM = BIT(1), - MLO_PAUSE_RX = BIT(2), - MLO_PAUSE_TX = BIT(3), + MLO_PAUSE_RX = BIT(0), + MLO_PAUSE_TX = BIT(1), MLO_PAUSE_TXRX_MASK = MLO_PAUSE_TX | MLO_PAUSE_RX, - MLO_PAUSE_AN = BIT(4), + MLO_PAUSE_AN = BIT(2), MLO_AN_PHY = 0, /* Conventional PHY */ MLO_AN_FIXED, /* Fixed-link mode */ -- cgit v1.2.3 From b70486f94bb4820e84491089da5e30d29e774b0d Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 15 Feb 2020 15:50:09 +0000 Subject: net: phylink: clarify flow control settings in documentation Clarify the expected flow control settings operation in the phylink documentation for each negotiation mode. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phylink.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 0d6073c2b2b7..812357c03df4 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -152,13 +152,20 @@ void mac_pcs_get_state(struct phylink_config *config, * guaranteed to be correct, and so any mac_config() implementation must * never reference these fields. * + * In all negotiation modes, as defined by @mode, @state->pause indicates the + * pause settings which should be applied as follows. If %MLO_PAUSE_AN is not + * set, %MLO_PAUSE_TX and %MLO_PAUSE_RX indicate whether the MAC should send + * pause frames and/or act on received pause frames respectively. Otherwise, + * the results of in-band negotiation/status from the MAC PCS should be used + * to control the MAC pause mode settings. + * * The action performed depends on the currently selected mode: * * %MLO_AN_FIXED, %MLO_AN_PHY: - * Configure the specified @state->speed, @state->duplex and - * @state->pause (%MLO_PAUSE_TX / %MLO_PAUSE_RX) modes over a link - * specified by @state->interface. @state->advertising may be used, - * but is not required. Other members of @state must be ignored. + * Configure the specified @state->speed and @state->duplex over a link + * specified by @state->interface. @state->advertising may be used, but + * is not required. Pause modes as above. Other members of @state must + * be ignored. * * Valid state members: interface, speed, duplex, pause, advertising. * @@ -170,11 +177,14 @@ void mac_pcs_get_state(struct phylink_config *config, * mac_pcs_get_state() callback. Changes in link state must be made * by calling phylink_mac_change(). * + * Interface mode specific details are mentioned below. + * * If in 802.3z mode, the link speed is fixed, dependent on the - * @state->interface. Duplex is negotiated, and pause is advertised - * according to @state->an_enabled, @state->pause and - * @state->advertising flags. Beware of MACs which only support full - * duplex at gigabit and higher speeds. + * @state->interface. Duplex and pause modes are negotiated via + * the in-band configuration word. Advertised pause modes are set + * according to the @state->an_enabled and @state->advertising + * flags. Beware of MACs which only support full duplex at gigabit + * and higher speeds. * * If in Cisco SGMII mode, the link speed and duplex mode are passed * in the serial bitstream 16-bit configuration word, and the MAC -- cgit v1.2.3 From d8fab4815a371e8013e1a769c31da1bcaf618b01 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 14 Feb 2020 15:30:01 +0100 Subject: net/mlx5: fix spelling mistake "reserverd" -> "reserved" The reserved member should be named reserved. Signed-off-by: Alexandre Belloni Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc_fpga.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 37e065a80a43..07d77323f78a 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -608,7 +608,7 @@ struct mlx5_ifc_tls_cmd_bits { struct mlx5_ifc_tls_resp_bits { u8 syndrome[0x20]; u8 stream_id[0x20]; - u8 reserverd[0x40]; + u8 reserved[0x40]; }; #define MLX5_TLS_COMMAND_SIZE (0x100) -- cgit v1.2.3 From 12206b17235aed1ca6390b3e516825ae276f8345 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 11 Feb 2020 14:32:43 -0800 Subject: net/mlx5: Add support for resource dump On driver load: - Initialize resource dump data structure and memory access tools (mkey & pd). - Read the resource dump's menu which contains the FW segment identifier. Each record is identified by the segment name (ASCII). During the driver's course of life, users (like reporters) may request dumps per segment. The user should create a command providing the segment identifier (SW enumeration) and command keys. In return, the user receives a command context. In order to receive the dump, the user should supply the command context and a memory (aligned to a PAGE) on which the dump content will be written. Since the dump may be larger than the given memory, the user may resubmit the command until received an indication of end-of-dump. It is the user's responsibility to destroy the command. Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 277a51d3ec40..f99cbe249425 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -722,6 +722,7 @@ struct mlx5_core_dev { struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; struct mlx5_fw_tracer *tracer; + struct mlx5_rsc_dump *rsc_dump; u32 vsc_addr; struct mlx5_hv_vhca *hv_vhca; }; -- cgit v1.2.3 From a4393861a351f66fef1102e775743c86a276afce Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 17 Feb 2020 12:15:28 +0000 Subject: bpf, sk_msg: Let ULP restore sk_proto and write_space callback We don't need a fallback for when the socket is not using ULP. tcp_update_ulp handles this case exactly the same as we do in sk_psock_restore_proto. Get rid of the duplicated code. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200217121530.754315-2-jakub@cloudflare.com --- include/linux/skmsg.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 14d61bba0b79..8605947d6c08 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -361,16 +361,7 @@ static inline void sk_psock_restore_proto(struct sock *sk, sk->sk_prot->unhash = psock->saved_unhash; if (psock->sk_proto) { - struct inet_connection_sock *icsk = inet_csk(sk); - bool has_ulp = !!icsk->icsk_ulp_data; - - if (has_ulp) { - tcp_update_ulp(sk, psock->sk_proto, - psock->saved_write_space); - } else { - sk->sk_prot = psock->sk_proto; - sk->sk_write_space = psock->saved_write_space; - } + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); psock->sk_proto = NULL; } else { sk->sk_write_space = psock->saved_write_space; -- cgit v1.2.3 From a178b4585865a4c756c41bc5376f63416b7d9271 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 17 Feb 2020 12:15:29 +0000 Subject: bpf, sk_msg: Don't clear saved sock proto on restore There is no need to clear psock->sk_proto when restoring socket protocol callbacks in sk->sk_prot. The psock is about to get detached from the sock and eventually destroyed. At worst we will restore the protocol callbacks and the write callback twice. This makes reasoning about psock state easier. Once psock is initialized, we can count on psock->sk_proto always being set. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200217121530.754315-3-jakub@cloudflare.com --- include/linux/skmsg.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 8605947d6c08..d90ef61712a1 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -359,13 +359,7 @@ static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { sk->sk_prot->unhash = psock->saved_unhash; - - if (psock->sk_proto) { - tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); - psock->sk_proto = NULL; - } else { - sk->sk_write_space = psock->saved_write_space; - } + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); } static inline void sk_psock_set_state(struct sk_psock *psock, -- cgit v1.2.3 From 0f0d3827c0b4d6c3d219a73ea103077dc5bc17aa Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 16 Feb 2020 12:01:26 +0200 Subject: net/mlx5: E-Switch, Move source port on reg_c0 to the upper 16 bits Multi chain support requires the miss path to continue the processing from the last chain id, and for that we need to save the chain miss tag (a mapping for 32bit chain id) on reg_c0 which will come in a next patch. Currently reg_c0 is exclusively used to store the source port metadata, giving it 32bit, it is created from 16bits of vcha_id, and 16bits of vport number. We will move this source port metadata to upper 16bits, and leave the lower bits for the chain miss tag. We compress the reg_c0 source port metadata to 16bits by taking 8 bits from vhca_id, and 8bits from the vport number. Since we compress the vport number to 8bits statically, and leave two top ids for special PF/ECPF numbers, we will only support a max of 254 vports with this strategy. Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 98e667b176ef..dd1333f29f6e 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -71,7 +71,26 @@ enum devlink_eswitch_encap_mode mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); -u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + +/* Reg C0 usage: + * Reg C0 = < ESW_VHCA_ID_BITS(8) | ESW_VPORT BITS(8) | ESW_CHAIN_TAG(16) > + * + * Highest 8 bits of the reg c0 is the vhca_id, next 8 bits is vport_num, + * the rest (lowest 16 bits) is left for tc chain tag restoration. + * VHCA_ID + VPORT comprise the SOURCE_PORT matching. + */ +#define ESW_VHCA_ID_BITS 8 +#define ESW_VPORT_BITS 8 +#define ESW_SOURCE_PORT_METADATA_BITS (ESW_VHCA_ID_BITS + ESW_VPORT_BITS) +#define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS) +#define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS) + +static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) +{ + return GENMASK(31, 32 - ESW_SOURCE_PORT_METADATA_BITS); +} + +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num); u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); #else /* CONFIG_MLX5_ESWITCH */ @@ -94,11 +113,17 @@ mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) }; static inline u32 -mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, +mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, int vport_num) { return 0; }; + +static inline u32 +mlx5_eswitch_get_vport_metadata_mask(void) +{ + return 0; +} #endif /* CONFIG_MLX5_ESWITCH */ #endif -- cgit v1.2.3 From 11b717d6152699623fb1133759f9b8f235935a51 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 16 Feb 2020 12:01:27 +0200 Subject: net/mlx5: E-Switch, Get reg_c0 value on CQE On RX side create a restore table in OFFLOADS namespace. This table will match on all values for reg_c0 we will use, and set it to the flow_tag. This flow tag can then be read on the CQE. As there is no copy action from reg c0 to flow tag, instead we have to set the flow tag explictily. We add an API so callers can add all the used reg_c0 values (tags) and for each of those we add a restore rule. This will be used in a following patch to save the miss chain mapping tag on reg_c0 and from it restore the tc chain on the skb. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index dd1333f29f6e..61705e74a5bb 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -84,6 +84,8 @@ bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); #define ESW_SOURCE_PORT_METADATA_BITS (ESW_VHCA_ID_BITS + ESW_VPORT_BITS) #define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS) #define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS) +#define ESW_CHAIN_TAG_METADATA_MASK GENMASK(ESW_CHAIN_TAG_METADATA_BITS - 1,\ + 0) static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) { -- cgit v1.2.3 From b8e202d1d1d0f182f01062804efb523ea9a9008c Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:13 +0000 Subject: net, sk_msg: Annotate lockless access to sk_prot on clone sk_msg and ULP frameworks override protocol callbacks pointer in sk->sk_prot, while tcp accesses it locklessly when cloning the listening socket, that is with neither sk_lock nor sk_callback_lock held. Once we enable use of listening sockets with sockmap (and hence sk_msg), there will be shared access to sk->sk_prot if socket is getting cloned while being inserted/deleted to/from the sockmap from another CPU: Read side: tcp_v4_rcv sk = __inet_lookup_skb(...) tcp_check_req(sk) inet_csk(sk)->icsk_af_ops->syn_recv_sock tcp_v4_syn_recv_sock tcp_create_openreq_child inet_csk_clone_lock sk_clone_lock READ_ONCE(sk->sk_prot) Write side: sock_map_ops->map_update_elem sock_map_update_elem sock_map_update_common sock_map_link_no_progs tcp_bpf_init tcp_bpf_update_sk_prot sk_psock_update_proto WRITE_ONCE(sk->sk_prot, ops) sock_map_ops->map_delete_elem sock_map_delete_elem __sock_map_delete sock_map_unref sk_psock_put sk_psock_drop sk_psock_restore_proto tcp_update_ulp WRITE_ONCE(sk->sk_prot, proto) Mark the shared access with READ_ONCE/WRITE_ONCE annotations. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200218171023.844439-2-jakub@cloudflare.com --- include/linux/skmsg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index d90ef61712a1..112765bd146d 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -352,7 +352,8 @@ static inline void sk_psock_update_proto(struct sock *sk, psock->saved_write_space = sk->sk_write_space; psock->sk_proto = sk->sk_prot; - sk->sk_prot = ops; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, ops); } static inline void sk_psock_restore_proto(struct sock *sk, -- cgit v1.2.3 From ce69e2162f158d9d4a0e513971d02dabc7d14cb7 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 24 Feb 2020 16:53:58 -0600 Subject: mdio_bus: Add generic mdio_find_bus() It appears most ethernet drivers follow one of two main strategies for mdio bus/phy management. A monolithic model where the net driver itself creates, probes and uses the phy, and one where an external mdio/phy driver instantiates the mdio bus/phy and the net driver only attaches to a known phy. Usually in this latter model the phys are discovered via DT relationships or simply phy name/address hardcoding. This is a shame because modern well behaved mdio buses are self describing and can be probed. The mdio layer itself is fully capable of this, yet there isn't a clean way for a standalone net driver to attach and enumerate the discovered devices. This is because outside of of_mdio_find_bus() there isn't a straightforward way to acquire the mii_bus pointer. So, lets add a mdio_find_bus which can return the mii_bus based only on its name. Signed-off-by: Jeremy Linton Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 80f8b2158271..e72dbd0d2d6a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -289,6 +289,7 @@ static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev) return devm_mdiobus_alloc_size(dev, 0); } +struct mii_bus *mdio_find_bus(const char *mdio_name); void devm_mdiobus_free(struct device *dev, struct mii_bus *bus); struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); -- cgit v1.2.3 From 3c58482a382bae89410439247152eb342e9872f7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Feb 2020 15:01:41 +0100 Subject: bpf: Provide bpf_prog_run_pin_on_cpu() helper BPF programs require to run on one CPU to completion as they use per CPU storage, but according to Alexei they don't need reentrancy protection as obviously BPF programs running in thread context can always be 'preempted' by hard and soft interrupts and instrumentation and the same program can run concurrently on a different CPU. The currently used mechanism to ensure CPUness is to wrap the invocation into a preempt_disable/enable() pair. Disabling preemption is also disabling migration for a task. preempt_disable/enable() is used because there is no explicit way to reliably disable only migration. Provide a separate macro to invoke a BPF program which can be used in migrateable task context. It wraps BPF_PROG_RUN() in a migrate_disable/enable() pair which maps on non RT enabled kernels to preempt_disable/enable(). On RT enabled kernels this merely disables migration. Both methods ensure that the invoked BPF program runs on one CPU to completion. Signed-off-by: Thomas Gleixner Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200224145643.474592620@linutronix.de --- include/linux/filter.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index f349e2c0884c..38f60188bb26 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -576,8 +576,30 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); } \ ret; }) -#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \ - bpf_dispatcher_nopfunc) +#define BPF_PROG_RUN(prog, ctx) \ + __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc) + +/* + * Use in preemptible and therefore migratable context to make sure that + * the execution of the BPF program runs on one CPU. + * + * This uses migrate_disable/enable() explicitly to document that the + * invocation of a BPF program does not require reentrancy protection + * against a BPF program which is invoked from a preempting task. + * + * For non RT enabled kernels migrate_disable/enable() maps to + * preempt_disable/enable(), i.e. it disables also preemption. + */ +static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, + const void *ctx) +{ + u32 ret; + + migrate_disable(); + ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc); + migrate_enable(); + return ret; +} #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN -- cgit v1.2.3 From 37e1d9202225635772b32e340294208367279c2b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Feb 2020 15:01:42 +0100 Subject: bpf: Replace cant_sleep() with cant_migrate() As already discussed in the previous change which introduced BPF_RUN_PROG_PIN_ON_CPU() BPF only requires to disable migration to guarantee per CPUness. If RT substitutes the preempt disable based migration protection then the cant_sleep() check will obviously trigger as preemption is not disabled. Replace it by cant_migrate() which maps to cant_sleep() on a non RT kernel and will verify that migration is disabled on a full RT kernel. Signed-off-by: Thomas Gleixner Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200224145643.583038889@linutronix.de --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 38f60188bb26..1982a52eb4c9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -561,7 +561,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); #define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \ u32 ret; \ - cant_sleep(); \ + cant_migrate(); \ if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ struct bpf_prog_stats *stats; \ u64 start = sched_clock(); \ -- cgit v1.2.3 From 3d9f773cf2876c01a505b9fe27270901d464e90a Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 24 Feb 2020 15:01:43 +0100 Subject: bpf: Use bpf_prog_run_pin_on_cpu() at simple call sites. All of these cases are strictly of the form: preempt_disable(); BPF_PROG_RUN(...); preempt_enable(); Replace this with bpf_prog_run_pin_on_cpu() which wraps BPF_PROG_RUN() with: migrate_disable(); BPF_PROG_RUN(...); migrate_enable(); On non RT enabled kernels this maps to preempt_disable/enable() and on RT enabled kernels this solely prevents migration, which is sufficient as there is no requirement to prevent reentrancy to any BPF program from a preempting task. The only requirement is that the program stays on the same CPU. Therefore, this is a trivially correct transformation. The seccomp loop does not need protection over the loop. It only needs protection per BPF filter program [ tglx: Converted to bpf_prog_run_pin_on_cpu() ] Signed-off-by: David S. Miller Signed-off-by: Thomas Gleixner Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200224145643.691493094@linutronix.de --- include/linux/filter.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1982a52eb4c9..9270de2a0df8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -717,9 +717,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, if (unlikely(prog->cb_access)) memset(cb_data, 0, BPF_SKB_CB_LEN); - preempt_disable(); - res = BPF_PROG_RUN(prog, skb); - preempt_enable(); + res = bpf_prog_run_pin_on_cpu(prog, skb); return res; } -- cgit v1.2.3 From 2a916f2f546ca1c1e3323e2a4269307f6d9890eb Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 24 Feb 2020 15:01:46 +0100 Subject: bpf: Use migrate_disable/enable in array macros and cgroup/lirc code. Replace the preemption disable/enable with migrate_disable/enable() to reflect the actual requirement and to allow PREEMPT_RT to substitute it with an actual migration disable mechanism which does not disable preemption. Including the code paths that go via __bpf_prog_run_save_cb(). Signed-off-by: David S. Miller Signed-off-by: Thomas Gleixner Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200224145643.998293311@linutronix.de --- include/linux/bpf.h | 8 ++++---- include/linux/filter.h | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 49b1a70e12c8..76b3a0eb1502 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -885,7 +885,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog *_prog; \ struct bpf_prog_array *_array; \ u32 _ret = 1; \ - preempt_disable(); \ + migrate_disable(); \ rcu_read_lock(); \ _array = rcu_dereference(array); \ if (unlikely(check_non_null && !_array))\ @@ -898,7 +898,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, } \ _out: \ rcu_read_unlock(); \ - preempt_enable(); \ + migrate_enable(); \ _ret; \ }) @@ -932,7 +932,7 @@ _out: \ u32 ret; \ u32 _ret = 1; \ u32 _cn = 0; \ - preempt_disable(); \ + migrate_disable(); \ rcu_read_lock(); \ _array = rcu_dereference(array); \ _item = &_array->items[0]; \ @@ -944,7 +944,7 @@ _out: \ _item++; \ } \ rcu_read_unlock(); \ - preempt_enable(); \ + migrate_enable(); \ if (_ret) \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ else \ diff --git a/include/linux/filter.h b/include/linux/filter.h index 9270de2a0df8..43b5e455d2f5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -677,6 +677,7 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb) return qdisc_skb_cb(skb)->data; } +/* Must be invoked with migration disabled */ static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog, struct sk_buff *skb) { @@ -702,9 +703,9 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, { u32 res; - preempt_disable(); + migrate_disable(); res = __bpf_prog_run_save_cb(prog, skb); - preempt_enable(); + migrate_enable(); return res; } -- cgit v1.2.3 From c518cfa0c5ad75ddf3d743f1e35b9cf5fc2c346e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Feb 2020 15:01:47 +0100 Subject: bpf: Provide recursion prevention helpers The places which need to prevent the execution of trace type BPF programs to prevent deadlocks on the hash bucket lock do this open coded. Provide two inline functions, bpf_disable/enable_instrumentation() to replace these open coded protection constructs. Use migrate_disable/enable() instead of preempt_disable/enable() right away so this works on RT enabled kernels. On a !RT kernel migrate_disable / enable() are mapped to preempt_disable/enable(). These helpers use this_cpu_inc/dec() instead of __this_cpu_inc/dec() on an RT enabled kernel because migrate disabled regions are preemptible and preemption might hit in the middle of a RMW operation which can lead to inconsistent state. Signed-off-by: Thomas Gleixner Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200224145644.103910133@linutronix.de --- include/linux/bpf.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 76b3a0eb1502..1acd5bf70350 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -961,6 +961,36 @@ _out: \ #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); +/* + * Block execution of BPF programs attached to instrumentation (perf, + * kprobes, tracepoints) to prevent deadlocks on map operations as any of + * these events can happen inside a region which holds a map bucket lock + * and can deadlock on it. + * + * Use the preemption safe inc/dec variants on RT because migrate disable + * is preemptible on RT and preemption in the middle of the RMW operation + * might lead to inconsistent state. Use the raw variants for non RT + * kernels as migrate_disable() maps to preempt_disable() so the slightly + * more expensive save operation can be avoided. + */ +static inline void bpf_disable_instrumentation(void) +{ + migrate_disable(); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + this_cpu_inc(bpf_prog_active); + else + __this_cpu_inc(bpf_prog_active); +} + +static inline void bpf_enable_instrumentation(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + this_cpu_dec(bpf_prog_active); + else + __this_cpu_dec(bpf_prog_active); + migrate_enable(); +} + extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; -- cgit v1.2.3 From f70ce185687bbe4e2d7ff126a8c890631f5fc2af Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Feb 2020 04:37:11 +0100 Subject: sysfs: add sysfs_file_change_owner() Add helpers to change the owner of a sysfs files. This function will be used to correctly account for kobject ownership changes, e.g. when moving network devices between network namespaces. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/sysfs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index fa7ee503fb76..a7884024a911 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -310,6 +310,9 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn) return kernfs_enable_ns(kn); } +int sysfs_file_change_owner(struct kobject *kobj, const char *name, kuid_t kuid, + kgid_t kgid); + #else /* CONFIG_SYSFS */ static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) @@ -522,6 +525,13 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn) { } +static inline int sysfs_file_change_owner(struct kobject *kobj, + const char *name, kuid_t kuid, + kgid_t kgid) +{ + return 0; +} + #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, -- cgit v1.2.3 From 0666a3aee762cd4f7981c2eed0fd8cab87533539 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Feb 2020 04:37:12 +0100 Subject: sysfs: add sysfs_link_change_owner() Add a helper to change the owner of a sysfs link. This function will be used to correctly account for kobject ownership changes, e.g. when moving network devices between network namespaces. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/sysfs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index a7884024a911..7e15ebfd750e 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -312,6 +312,8 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn) int sysfs_file_change_owner(struct kobject *kobj, const char *name, kuid_t kuid, kgid_t kgid); +int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ, + const char *name, kuid_t kuid, kgid_t kgid); #else /* CONFIG_SYSFS */ @@ -532,6 +534,14 @@ static inline int sysfs_file_change_owner(struct kobject *kobj, return 0; } +static inline int sysfs_link_change_owner(struct kobject *kobj, + struct kobject *targ, + const char *name, kuid_t kuid, + kgid_t kgid) +{ + return 0; +} + #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, -- cgit v1.2.3 From 303a42769c4c4d8e5e3ad928df87eb36f8c1fa60 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Feb 2020 04:37:13 +0100 Subject: sysfs: add sysfs_group{s}_change_owner() Add helpers to change the owner of sysfs groups. This function will be used to correctly account for kobject ownership changes, e.g. when moving network devices between network namespaces. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/sysfs.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 7e15ebfd750e..3fcaabdb05ef 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -314,6 +314,12 @@ int sysfs_file_change_owner(struct kobject *kobj, const char *name, kuid_t kuid, kgid_t kgid); int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ, const char *name, kuid_t kuid, kgid_t kgid); +int sysfs_groups_change_owner(struct kobject *kobj, + const struct attribute_group **groups, + kuid_t kuid, kgid_t kgid); +int sysfs_group_change_owner(struct kobject *kobj, + const struct attribute_group *groups, kuid_t kuid, + kgid_t kgid); #else /* CONFIG_SYSFS */ @@ -542,6 +548,20 @@ static inline int sysfs_link_change_owner(struct kobject *kobj, return 0; } +static inline int sysfs_groups_change_owner(struct kobject *kobj, + const struct attribute_group **groups, + kuid_t kuid, kgid_t kgid) +{ + return 0; +} + +static inline int sysfs_group_change_owner(struct kobject *kobj, + const struct attribute_group **groups, + kuid_t kuid, kgid_t kgid) +{ + return 0; +} + #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, -- cgit v1.2.3 From 2c4f9401ceb00167a3bfd322a28aa87b646a253f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Feb 2020 04:37:14 +0100 Subject: sysfs: add sysfs_change_owner() Add a helper to change the owner of sysfs objects. This function will be used to correctly account for kobject ownership changes, e.g. when moving network devices between network namespaces. This mirrors how a kobject is added through driver core which in its guts is done via kobject_add_internal() which in summary creates the main directory via create_dir(), populates that directory with the groups associated with the ktype of the kobject (if any) and populates the directory with the basic attributes associated with the ktype of the kobject (if any). These are the basic steps that are associated with adding a kobject in sysfs. Any additional properties are added by the specific subsystem itself (not by driver core) after it has registered the device. So for the example of network devices, a network device will e.g. register a queue subdirectory under the basic sysfs directory for the network device and than further subdirectories within that queues subdirectory. But that is all specific to network devices and they call the corresponding sysfs functions to do that directly when they create those queue objects. So anything that a subsystem adds outside of what driver core does must also be changed by it (That's already true for removal of files it created outside of driver core.) and it's the same for ownership changes. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/sysfs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 3fcaabdb05ef..9e531ec76274 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -312,6 +312,7 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn) int sysfs_file_change_owner(struct kobject *kobj, const char *name, kuid_t kuid, kgid_t kgid); +int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid); int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ, const char *name, kuid_t kuid, kgid_t kgid); int sysfs_groups_change_owner(struct kobject *kobj, @@ -548,6 +549,11 @@ static inline int sysfs_link_change_owner(struct kobject *kobj, return 0; } +static inline int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid) +{ + return 0; +} + static inline int sysfs_groups_change_owner(struct kobject *kobj, const struct attribute_group **groups, kuid_t kuid, kgid_t kgid) -- cgit v1.2.3 From b8f33e5d76a7a1b87e0cc760d05bf2477b4e91d6 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Feb 2020 04:37:15 +0100 Subject: device: add device_change_owner() Add a helper to change the owner of a device's sysfs entries. This needs to happen when the ownership of a device is changed, e.g. when moving network devices between network namespaces. This function will be used to correctly account for ownership changes, e.g. when moving network devices between network namespaces. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 0cd7c647c16c..3e40533d2037 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -817,6 +817,7 @@ extern struct device *device_find_child_by_name(struct device *parent, extern int device_rename(struct device *dev, const char *new_name); extern int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order); +extern int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid); extern const char *device_get_devnode(struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid, const char **tmp); -- cgit v1.2.3 From 91a208f2185ad4855ff03c342d0b7e4f5fc6f5df Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 26 Feb 2020 10:23:41 +0000 Subject: net: phylink: propagate resolved link config via mac_link_up() Propagate the resolved link parameters via the mac_link_up() call for MACs that do not automatically track their PCS state. We propagate the link parameters via function arguments so that inappropriate members of struct phylink_link_state can't be accessed, and creating a new structure just for this adds needless complexity to the API. Tested-by: Andre Przywara Tested-by: Alexandre Belloni Tested-by: Vladimir Oltean Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 57 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 812357c03df4..2180eb1aa254 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -91,9 +91,10 @@ struct phylink_mac_ops { void (*mac_an_restart)(struct phylink_config *config); void (*mac_link_down)(struct phylink_config *config, unsigned int mode, phy_interface_t interface); - void (*mac_link_up)(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, - struct phy_device *phy); + void (*mac_link_up)(struct phylink_config *config, + struct phy_device *phy, unsigned int mode, + phy_interface_t interface, int speed, int duplex, + bool tx_pause, bool rx_pause); }; #if 0 /* For kernel-doc purposes only. */ @@ -152,6 +153,9 @@ void mac_pcs_get_state(struct phylink_config *config, * guaranteed to be correct, and so any mac_config() implementation must * never reference these fields. * + * (this requires a rewrite - please refer to mac_link_up() for situations + * where the PCS and MAC are not tightly integrated.) + * * In all negotiation modes, as defined by @mode, @state->pause indicates the * pause settings which should be applied as follows. If %MLO_PAUSE_AN is not * set, %MLO_PAUSE_TX and %MLO_PAUSE_RX indicate whether the MAC should send @@ -162,12 +166,20 @@ void mac_pcs_get_state(struct phylink_config *config, * The action performed depends on the currently selected mode: * * %MLO_AN_FIXED, %MLO_AN_PHY: - * Configure the specified @state->speed and @state->duplex over a link - * specified by @state->interface. @state->advertising may be used, but - * is not required. Pause modes as above. Other members of @state must - * be ignored. + * Configure for non-inband negotiation mode, where the link settings + * are completely communicated via mac_link_up(). The physical link + * protocol from the MAC is specified by @state->interface. + * + * @state->advertising may be used, but is not required. + * + * Older drivers (prior to the mac_link_up() change) may use @state->speed, + * @state->duplex and @state->pause to configure the MAC, but this is + * deprecated; such drivers should be converted to use mac_link_up(). * - * Valid state members: interface, speed, duplex, pause, advertising. + * Other members of @state must be ignored. + * + * Valid state members: interface, advertising. + * Deprecated state members: speed, duplex, pause. * * %MLO_AN_INBAND: * place the link in an inband negotiation mode (such as 802.3z @@ -228,19 +240,34 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, /** * mac_link_up() - allow the link to come up * @config: a pointer to a &struct phylink_config. + * @phy: any attached phy * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode - * @phy: any attached phy + * @speed: link speed + * @duplex: link duplex + * @tx_pause: link transmit pause enablement status + * @rx_pause: link receive pause enablement status * - * If @mode is not an in-band negotiation mode (as defined by - * phylink_autoneg_inband()), allow the link to come up. If @phy - * is non-%NULL, configure Energy Efficient Ethernet by calling + * Configure the MAC for an established link. + * + * @speed, @duplex, @tx_pause and @rx_pause indicate the finalised link + * settings, and should be used to configure the MAC block appropriately + * where these settings are not automatically conveyed from the PCS block, + * or if in-band negotiation (as defined by phylink_autoneg_inband(@mode)) + * is disabled. + * + * Note that when 802.3z in-band negotiation is in use, it is possible + * that the user wishes to override the pause settings, and this should + * be allowed when considering the implementation of this method. + * + * If in-band negotiation mode is disabled, allow the link to come up. If + * @phy is non-%NULL, configure Energy Efficient Ethernet by calling * phy_init_eee() and perform appropriate MAC configuration for EEE. * Interface type selection must be done in mac_config(). */ -void mac_link_up(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, - struct phy_device *phy); +void mac_link_up(struct phylink_config *config, struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, bool tx_pause, bool rx_pause); #endif struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, -- cgit v1.2.3 From d7f10df86202273155a9d8f8553bc2ad28e0dd46 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 26 Feb 2020 18:17:44 -0600 Subject: bpf: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200227001744.GA3317@embeddedor --- include/linux/bpf-cgroup.h | 2 +- include/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a11d5b7dbbf3..a7cd5c7a2509 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -36,7 +36,7 @@ struct bpf_cgroup_storage_map; struct bpf_storage_buffer { struct rcu_head rcu; - char data[0]; + char data[]; }; struct bpf_cgroup_storage { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1acd5bf70350..9aa33b8f3d55 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -859,7 +859,7 @@ struct bpf_prog_array_item { struct bpf_prog_array { struct rcu_head rcu; - struct bpf_prog_array_item items[0]; + struct bpf_prog_array_item items[]; }; struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); -- cgit v1.2.3 From 96e326878fa5e2727d14e9a23644119374619010 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2020 17:30:41 +0200 Subject: net/mlx5e: Eswitch, Use per vport tables for mirroring When using port mirroring, we forward the traffic to another table and use that table to forward to the mirrored vport. Since the hardware loses the values of reg c, and in particular reg c0, we fail the match on the input vport which previously existed in reg c0. To overcome this situation, we use a set of per vport tables, positioned at the lowest priority, and forward traffic to those tables. Since these tables are per vport, we can avoid matching on reg c0. Fixes: c01cfd0f1115 ("net/mlx5: E-Switch, Add match on vport metadata for rule in fast path") Signed-off-by: Eli Cohen Reviewed-by: Mark Bloch Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 4cae16016b2b..a5cf5c76f348 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -84,6 +84,7 @@ enum { FDB_TC_OFFLOAD, FDB_FT_OFFLOAD, FDB_SLOW_PATH, + FDB_PER_VPORT, }; struct mlx5_pkt_reformat; -- cgit v1.2.3 From 5682d393b40e1fe7426a7b8c3471f05262f42010 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:09 -0800 Subject: inet_diag: Refactor inet_sk_diag_fill(), dump(), and dump_one() In a latter patch, there is a need to update "cb->min_dump_alloc" in inet_sk_diag_fill() as it learns the diffierent bpf_sk_storages stored in a sk while dumping all sk(s) (e.g. tcp_hashinfo). The inet_sk_diag_fill() currently does not take the "cb" as an argument. One of the reason is inet_sk_diag_fill() is used by both dump_one() and dump() (which belong to the "struct inet_diag_handler". The dump_one() interface does not pass the "cb" along. This patch is to make dump_one() pass a "cb". The "cb" is created in inet_diag_cmd_exact(). The "nlh" and "in_skb" are stored in "cb" as the dump() interface does. The total number of args in inet_sk_diag_fill() is also cut from 10 to 7 and that helps many callers to pass fewer args. In particular, "struct user_namespace *user_ns", "u32 pid", and "u32 seq" can be replaced by accessing "cb->nlh" and "cb->skb". A similar argument reduction is also made to inet_twsk_diag_fill() and inet_req_diag_fill(). inet_csk_diag_dump() and inet_csk_diag_fill() are also removed. They are mostly equivalent to inet_sk_diag_fill(). Their repeated usages are very limited. Thus, inet_sk_diag_fill() is directly used in those occasions. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230409.1975173-1-kafai@fb.com --- include/linux/inet_diag.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 39faaaf843e1..6b157ce07d74 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -18,8 +18,7 @@ struct inet_diag_handler { const struct inet_diag_req_v2 *r, struct nlattr *bc); - int (*dump_one)(struct sk_buff *in_skb, - const struct nlmsghdr *nlh, + int (*dump_one)(struct netlink_callback *cb, const struct inet_diag_req_v2 *req); void (*idiag_get_info)(struct sock *sk, @@ -42,16 +41,15 @@ struct inet_diag_handler { struct inet_connection_sock; int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, const struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, bool net_admin); + struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *req, + u16 nlmsg_flags, bool net_admin); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, const struct nlmsghdr *nlh, + struct netlink_callback *cb, const struct inet_diag_req_v2 *req); struct sock *inet_diag_find_one_icsk(struct net *net, -- cgit v1.2.3 From 0df6d32842b9a5f97a29ea90c8adc5cfac38341d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:15 -0800 Subject: inet_diag: Move the INET_DIAG_REQ_BYTECODE nlattr to cb->data The INET_DIAG_REQ_BYTECODE nlattr is currently re-found every time when the "dump()" is re-started. In a latter patch, it will also need to parse the new INET_DIAG_REQ_SK_BPF_STORAGES nlattr to learn the map_fds. Thus, this patch takes this chance to store the parsed nlattr in cb->data during the "start" time of a dump. By doing this, the "bc" argument also becomes unnecessary and is removed. Also, the two copies of the INET_DIAG_REQ_BYTECODE parsing-audit logic between compat/current version can be consolidated to one. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230415.1975555-1-kafai@fb.com --- include/linux/inet_diag.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 6b157ce07d74..1bb94cac265f 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -15,8 +15,7 @@ struct netlink_callback; struct inet_diag_handler { void (*dump)(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc); + const struct inet_diag_req_v2 *r); int (*dump_one)(struct netlink_callback *cb, const struct inet_diag_req_v2 *req); @@ -39,6 +38,11 @@ struct inet_diag_handler { __u16 idiag_info_size; }; +struct inet_diag_dump_data { + struct nlattr *req_nlas[__INET_DIAG_REQ_MAX]; +#define inet_diag_nla_bc req_nlas[INET_DIAG_REQ_BYTECODE] +}; + struct inet_connection_sock; int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct netlink_callback *cb, @@ -46,8 +50,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, u16 nlmsg_flags, bool net_admin); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc); + const struct inet_diag_req_v2 *r); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct netlink_callback *cb, const struct inet_diag_req_v2 *req); -- cgit v1.2.3 From 1ed4d92458a969e71e7914550b6f0c730c14d84e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:21 -0800 Subject: bpf: INET_DIAG support in bpf_sk_storage This patch adds INET_DIAG support to bpf_sk_storage. 1. Although this series adds bpf_sk_storage diag capability to inet sk, bpf_sk_storage is in general applicable to all fullsock. Hence, the bpf_sk_storage logic will operate on SK_DIAG_* nlattr. The caller will pass in its specific nesting nlattr (e.g. INET_DIAG_*) as the argument. 2. The request will be like: INET_DIAG_REQ_SK_BPF_STORAGES (nla_nest) (defined in latter patch) SK_DIAG_BPF_STORAGE_REQ_MAP_FD (nla_put_u32) SK_DIAG_BPF_STORAGE_REQ_MAP_FD (nla_put_u32) ...... Considering there could have multiple bpf_sk_storages in a sk, instead of reusing INET_DIAG_INFO ("ss -i"), the user can select some specific bpf_sk_storage to dump by specifying an array of SK_DIAG_BPF_STORAGE_REQ_MAP_FD. If no SK_DIAG_BPF_STORAGE_REQ_MAP_FD is specified (i.e. an empty INET_DIAG_REQ_SK_BPF_STORAGES), it will dump all bpf_sk_storages of a sk. 3. The reply will be like: INET_DIAG_BPF_SK_STORAGES (nla_nest) (defined in latter patch) SK_DIAG_BPF_STORAGE (nla_nest) SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) SK_DIAG_BPF_STORAGE (nla_nest) SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) ...... 4. Unlike other INET_DIAG info of a sk which is pretty static, the size required to dump the bpf_sk_storage(s) of a sk is dynamic as the system adding more bpf_sk_storage_map. It is hard to set a static min_dump_alloc size. Hence, this series learns it at the runtime and adjust the cb->min_dump_alloc as it iterates all sk(s) of a system. The "unsigned int *res_diag_size" in bpf_sk_storage_diag_put() is for this purpose. The next patch will update the cb->min_dump_alloc as it iterates the sk(s). Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230421.1975729-1-kafai@fb.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9aa33b8f3d55..6015a4daf118 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1023,6 +1023,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); +struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); void bpf_map_inc(struct bpf_map *map); -- cgit v1.2.3 From 085c20cacf2b72991ce1c9d99a5e2f1d9e73bb68 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:27 -0800 Subject: bpf: inet_diag: Dump bpf_sk_storages in inet_diag_dump() This patch will dump out the bpf_sk_storages of a sk if the request has the INET_DIAG_REQ_SK_BPF_STORAGES nlattr. An array of SK_DIAG_BPF_STORAGE_REQ_MAP_FD can be specified in INET_DIAG_REQ_SK_BPF_STORAGES to select which bpf_sk_storage to dump. If no map_fd is specified, all bpf_sk_storages of a sk will be dumped. bpf_sk_storages can be added to the system at runtime. It is difficult to find a proper static value for cb->min_dump_alloc. This patch learns the nlattr size required to dump the bpf_sk_storages of a sk. If it happens to be the very first nlmsg of a dump and it cannot fit the needed bpf_sk_storages, it will try to expand the skb by "pskb_expand_head()". Instead of expanding it in inet_sk_diag_fill(), it is expanded at a sleepable context in __inet_diag_dump() so __GFP_DIRECT_RECLAIM can be used. In __inet_diag_dump(), it will retry as long as the skb is empty and the cb->min_dump_alloc becomes larger than before. cb->min_dump_alloc is bounded by KMALLOC_MAX_SIZE. The min_dump_alloc is also changed from 'u16' to 'u32' to accommodate a sk that may have a few large bpf_sk_storages. The updated cb->min_dump_alloc will also be used to allocate the skb in the next dump. This logic already exists in netlink_dump(). Here is the sample output of a locally modified 'ss' and it could be made more readable by using BTF later: [root@arch-fb-vm1 ~]# ss --bpf-map-id 14 --bpf-map-id 13 -t6an 'dst [::1]:8989' State Recv-Q Send-Q Local Address:Port Peer Address:PortProcess ESTAB 0 0 [::1]:51072 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] ESTAB 0 0 [::1]:51070 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] [root@arch-fb-vm1 ~]# ~/devshare/github/iproute2/misc/ss --bpf-maps -t6an 'dst [::1]:8989' State Recv-Q Send-Q Local Address:Port Peer Address:Port Process ESTAB 0 0 [::1]:51072 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] bpf_map_id:12 value:[ 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000... total:65407 ] ESTAB 0 0 [::1]:51070 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] bpf_map_id:12 value:[ 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000... total:65407 ] Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230427.1976129-1-kafai@fb.com --- include/linux/inet_diag.h | 4 ++++ include/linux/netlink.h | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 1bb94cac265f..e4ba25d63913 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -38,9 +38,13 @@ struct inet_diag_handler { __u16 idiag_info_size; }; +struct bpf_sk_storage_diag; struct inet_diag_dump_data { struct nlattr *req_nlas[__INET_DIAG_REQ_MAX]; #define inet_diag_nla_bc req_nlas[INET_DIAG_REQ_BYTECODE] +#define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES] + + struct bpf_sk_storage_diag *bpf_stg_diag; }; struct inet_connection_sock; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 205fa7b1f07a..788969ccbbde 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -188,10 +188,10 @@ struct netlink_callback { struct module *module; struct netlink_ext_ack *extack; u16 family; - u16 min_dump_alloc; - bool strict_check; u16 answer_flags; + u32 min_dump_alloc; unsigned int prev_seq, seq; + bool strict_check; union { u8 ctx[48]; -- cgit v1.2.3 From 8402a31dd803e091fd2ec9cd22040b34a0b07085 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 28 Feb 2020 07:33:37 -0600 Subject: net: dccp: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6b64b6cc2175..07e547c02fd8 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -198,7 +198,7 @@ enum dccp_role { struct dccp_service_list { __u32 dccpsl_nr; - __be32 dccpsl_list[0]; + __be32 dccpsl_list[]; }; #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) -- cgit v1.2.3 From e427cad6eee47e2daf207cd7a4156ae72496ee07 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 28 Feb 2020 14:45:22 +0100 Subject: net: datagram: drop 'destructor' argument from several helpers The only users for such argument are the UDP protocol and the UNIX socket family. We can safely reclaim the accounted memory directly from the UDP code and, after the previous patch, we can do scm stats accounting outside the datagram helpers. Overall this cleans up a bit some datagram-related helpers, and avoids an indirect call per packet in the UDP receive path. v1 -> v2: - call scm_stat_del() only when not peeking - Kirill - fix build issue with CONFIG_INET_ESPINTCP Signed-off-by: Paolo Abeni Reviewed-by: Kirill Tkhai Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5b50278c4bc8..21749b2cdc9b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3514,23 +3514,15 @@ int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, struct sk_buff_head *queue, unsigned int flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, struct sk_buff_head *queue, - unsigned int flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), - int *off, int *err, + unsigned int flags, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, - unsigned int flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), - int *off, int *err); + unsigned int flags, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); __poll_t datagram_poll(struct file *file, struct socket *sock, -- cgit v1.2.3 From 70ae1e127b486704c62d3537d69ca65c446d4d83 Mon Sep 17 00:00:00 2001 From: Cris Forno Date: Fri, 28 Feb 2020 14:12:04 -0600 Subject: ethtool: Factored out similar ethtool link settings for virtual devices to core Three virtual devices (ibmveth, virtio_net, and netvsc) all have similar code to set link settings and validate ethtool command. To eliminate duplication of code, it is factored out into core/ethtool.c. Signed-off-by: Cris Forno Signed-off-by: David S. Miller --- include/linux/ethtool.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 95991e4300bf..23373978cb3c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -420,4 +420,10 @@ struct ethtool_rx_flow_rule * ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input); void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *rule); +bool ethtool_virtdev_validate_cmd(const struct ethtool_link_ksettings *cmd); +int ethtool_virtdev_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd, + u32 *dev_speed, u8 *dev_duplex); + + #endif /* _LINUX_ETHTOOL_H */ -- cgit v1.2.3 From bb4cf02d4c74f0db60f58c406ddfdfed16d14f84 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 2 Mar 2020 05:59:33 -0600 Subject: netdevice: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6c3f7032e8d9..b6fedd54cd8e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -664,7 +664,7 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node struct rps_map { unsigned int len; struct rcu_head rcu; - u16 cpus[0]; + u16 cpus[]; }; #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) @@ -686,7 +686,7 @@ struct rps_dev_flow { struct rps_dev_flow_table { unsigned int mask; struct rcu_head rcu; - struct rps_dev_flow flows[0]; + struct rps_dev_flow flows[]; }; #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ ((_num) * sizeof(struct rps_dev_flow))) @@ -704,7 +704,7 @@ struct rps_dev_flow_table { struct rps_sock_flow_table { u32 mask; - u32 ents[0] ____cacheline_aligned_in_smp; + u32 ents[] ____cacheline_aligned_in_smp; }; #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) @@ -767,7 +767,7 @@ struct xps_map { unsigned int len; unsigned int alloc_len; struct rcu_head rcu; - u16 queues[0]; + u16 queues[]; }; #define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16))) #define XPS_MIN_MAP_ALLOC ((L1_CACHE_ALIGN(offsetof(struct xps_map, queues[1])) \ @@ -778,7 +778,7 @@ struct xps_map { */ struct xps_dev_maps { struct rcu_head rcu; - struct xps_map __rcu *attr_map[0]; /* Either CPUs map or RXQs map */ + struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */ }; #define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \ -- cgit v1.2.3 From 70ed506c3bbcfa846d4636b23051ca79fa4781f7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 2 Mar 2020 20:31:57 -0800 Subject: bpf: Introduce pinnable bpf_link abstraction Introduce bpf_link abstraction, representing an attachment of BPF program to a BPF hook point (e.g., tracepoint, perf event, etc). bpf_link encapsulates ownership of attached BPF program, reference counting of a link itself, when reference from multiple anonymous inodes, as well as ensures that release callback will be called from a process context, so that users can safely take mutex locks and sleep. Additionally, with a new abstraction it's now possible to generalize pinning of a link object in BPF FS, allowing to explicitly prevent BPF program detachment on process exit by pinning it in a BPF FS and let it open from independent other process to keep working with it. Convert two existing bpf_link-like objects (raw tracepoint and tracing BPF program attachments) into utilizing bpf_link framework, making them pinnable in BPF FS. More FD-based bpf_links will be added in follow up patches. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200303043159.323675-2-andriin@fb.com --- include/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6015a4daf118..f13c78c6f29d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1056,6 +1056,19 @@ extern int sysctl_unprivileged_bpf_disabled; int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); +struct bpf_link; + +struct bpf_link_ops { + void (*release)(struct bpf_link *link); +}; + +void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, + struct bpf_prog *prog); +void bpf_link_inc(struct bpf_link *link); +void bpf_link_put(struct bpf_link *link); +int bpf_link_new_fd(struct bpf_link *link); +struct bpf_link *bpf_link_get_from_fd(u32 ufd); + int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); -- cgit v1.2.3 From 88fd9e5352fe05f7fe57778293aebd4cd106960b Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 4 Mar 2020 20:18:47 +0100 Subject: bpf: Refactor trampoline update code As we need to introduce a third type of attachment for trampolines, the flattened signature of arch_prepare_bpf_trampoline gets even more complicated. Refactor the prog and count argument to arch_prepare_bpf_trampoline to use bpf_tramp_progs to simplify the addition and accounting for new attachment types. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200304191853.1529-2-kpsingh@chromium.org --- include/linux/bpf.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f13c78c6f29d..98ec10b23dbb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -433,6 +433,16 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SKIP_FRAME BIT(2) +/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 + * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 + */ +#define BPF_MAX_TRAMP_PROGS 40 + +struct bpf_tramp_progs { + struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS]; + int nr_progs; +}; + /* Different use cases for BPF trampoline: * 1. replace nop at the function entry (kprobe equivalent) * flags = BPF_TRAMP_F_RESTORE_REGS @@ -455,8 +465,7 @@ struct btf_func_model { */ int arch_prepare_bpf_trampoline(void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_prog **fentry_progs, int fentry_cnt, - struct bpf_prog **fexit_progs, int fexit_cnt, + struct bpf_tramp_progs *tprogs, void *orig_call); /* these two functions are called from generated trampoline */ u64 notrace __bpf_prog_enter(void); -- cgit v1.2.3 From ae24082331d9bbaae283aafbe930a8f0eb85605a Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 4 Mar 2020 20:18:49 +0100 Subject: bpf: Introduce BPF_MODIFY_RETURN When multiple programs are attached, each program receives the return value from the previous program on the stack and the last program provides the return value to the attached function. The fmod_ret bpf programs are run after the fentry programs and before the fexit programs. The original function is only called if all the fmod_ret programs return 0 to avoid any unintended side-effects. The success value, i.e. 0 is not currently configurable but can be made so where user-space can specify it at load time. For example: int func_to_be_attached(int a, int b) { <--- do_fentry do_fmod_ret: if (ret != 0) goto do_fexit; original_function: } <--- do_fexit The fmod_ret program attached to this function can be defined as: SEC("fmod_ret/func_to_be_attached") int BPF_PROG(func_name, int a, int b, int ret) { // This will skip the original function logic. return 1; } The first fmod_ret program is passed 0 in its return argument. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200304191853.1529-4-kpsingh@chromium.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 98ec10b23dbb..f748b31e5888 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -474,6 +474,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); enum bpf_tramp_prog_type { BPF_TRAMP_FENTRY, BPF_TRAMP_FEXIT, + BPF_TRAMP_MODIFY_RETURN, BPF_TRAMP_MAX, BPF_TRAMP_REPLACE, /* more than MAX */ }; -- cgit v1.2.3 From da00d2f117a08fbca262db5ea422c80a568b112b Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 4 Mar 2020 20:18:52 +0100 Subject: bpf: Add test ops for BPF_PROG_TYPE_TRACING The current fexit and fentry tests rely on a different program to exercise the functions they attach to. Instead of doing this, implement the test operations for tracing which will also be used for BPF_MODIFY_RETURN in a subsequent patch. Also, clean up the fexit test to use the generated skeleton. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200304191853.1529-7-kpsingh@chromium.org --- include/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f748b31e5888..40c53924571d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1156,6 +1156,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); @@ -1313,6 +1316,13 @@ static inline int bpf_prog_test_run_skb(struct bpf_prog *prog, return -ENOTSUPP; } +static inline int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) -- cgit v1.2.3 From d6e055e8733da5ce53fc69c77e379915400068c5 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 29 Feb 2020 23:23:44 +0100 Subject: PCI: Add constant PCI_STATUS_ERROR_BITS This collection of PCI error bits is used in more than one driver, so move it to the PCI core. Signed-off-by: Heiner Kallweit Acked-by: Bjorn Helgaas Signed-off-by: David S. Miller --- include/linux/pci.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 3840a541a9de..101d71e0ad0d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -42,6 +42,13 @@ #include +#define PCI_STATUS_ERROR_BITS (PCI_STATUS_DETECTED_PARITY | \ + PCI_STATUS_SIG_SYSTEM_ERROR | \ + PCI_STATUS_REC_MASTER_ABORT | \ + PCI_STATUS_REC_TARGET_ABORT | \ + PCI_STATUS_SIG_TARGET_ABORT | \ + PCI_STATUS_PARITY) + /* * The PCI interface treats multi-function devices as independent * devices. The slot/function address of each device is encoded -- cgit v1.2.3 From ec5d9e87842a43be3a10ada0d5f560bbd3f31d5d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 29 Feb 2020 23:24:23 +0100 Subject: PCI: Add pci_status_get_and_clear_errors Several drivers use the following code sequence: 1. Read PCI_STATUS 2. Mask out non-error bits 3. Action based on error bits set 4. Write back set error bits to clear them As this is a repeated pattern, add a helper to the PCI core. Signed-off-by: Heiner Kallweit Acked-by: Bjorn Helgaas Signed-off-by: David S. Miller --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 101d71e0ad0d..7beaf51e98ec 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1210,6 +1210,7 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags); bool pci_device_is_present(struct pci_dev *pdev); void pci_ignore_hotplug(struct pci_dev *dev); struct pci_dev *pci_real_dma_dev(struct pci_dev *dev); +int pci_status_get_and_clear_errors(struct pci_dev *pdev); int __printf(6, 7) pci_request_irq(struct pci_dev *dev, unsigned int nr, irq_handler_t handler, irq_handler_t thread_fn, void *dev_id, -- cgit v1.2.3 From 1326034b3ce7073e3ed74bd0f4d24afee96a9e07 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 19 Feb 2020 21:05:17 +0200 Subject: net/mlx5: Expose raw packet pacing APIs Expose raw packet pacing APIs to be used by DEVX based applications. The existing code was refactored to have a single flow with the new raw APIs. The new raw APIs considered the input of 'pp_rate_limit_context', uid, 'dedicated', upon looking for an existing entry. This raw mode enables future device specification data in the raw context without changing the existing logic and code. The ability to ask for a dedicated entry gives control for application to allocate entries according to its needs. A dedicated entry may not be used by some other process and it also enables the process spreading its resources to some different entries for use different hardware resources as part of enforcing the rate. The counter per entry was changed to be u64 to prevent any option to overflow. Signed-off-by: Yishai Hadas Acked-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 11 ++++++++--- include/linux/mlx5/mlx5_ifc.h | 26 ++++++++++++++++---------- 2 files changed, 24 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 277a51d3ec40..f2b4225ed650 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -518,9 +518,11 @@ struct mlx5_rate_limit { }; struct mlx5_rl_entry { - struct mlx5_rate_limit rl; - u16 index; - u16 refcount; + u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)]; + u16 index; + u64 refcount; + u16 uid; + u8 dedicated : 1; }; struct mlx5_rl_table { @@ -1007,6 +1009,9 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, struct mlx5_rate_limit *rl); void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl); bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate); +int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid, + bool dedicated_entry, u16 *index); +void mlx5_rl_remove_rate_raw(struct mlx5_core_dev *dev, u16 index); bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0, struct mlx5_rate_limit *rl_1); int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ff8c9d527bb4..7d89ab64b372 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -810,7 +810,9 @@ struct mlx5_ifc_qos_cap_bits { u8 reserved_at_4[0x1]; u8 packet_pacing_burst_bound[0x1]; u8 packet_pacing_typical_size[0x1]; - u8 reserved_at_7[0x19]; + u8 reserved_at_7[0x4]; + u8 packet_pacing_uid[0x1]; + u8 reserved_at_c[0x14]; u8 reserved_at_20[0x20]; @@ -8262,9 +8264,20 @@ struct mlx5_ifc_set_pp_rate_limit_out_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_set_pp_rate_limit_context_bits { + u8 rate_limit[0x20]; + + u8 burst_upper_bound[0x20]; + + u8 reserved_at_40[0x10]; + u8 typical_packet_size[0x10]; + + u8 reserved_at_60[0x120]; +}; + struct mlx5_ifc_set_pp_rate_limit_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -8274,14 +8287,7 @@ struct mlx5_ifc_set_pp_rate_limit_in_bits { u8 reserved_at_60[0x20]; - u8 rate_limit[0x20]; - - u8 burst_upper_bound[0x20]; - - u8 reserved_at_c0[0x10]; - u8 typical_packet_size[0x10]; - - u8 reserved_at_e0[0x120]; + struct mlx5_ifc_set_pp_rate_limit_context_bits ctx; }; struct mlx5_ifc_access_register_out_bits { -- cgit v1.2.3 From 95cddcb5cc202d3f2499596b9af5b77536c5f86a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 4 Mar 2020 21:15:31 -0800 Subject: ethtool: add infrastructure for centralized checking of coalescing parameters Linux supports 22 different interrupt coalescing parameters. No driver implements them all. Some drivers just ignore the ones they don't support, while others have to carry a long list of checks to reject unsupported settings. To simplify the drivers add the ability to specify inside ethtool_ops which parameters are supported and let the core reject attempts to set any other one. This commit makes the mechanism an opt-in, only drivers which set ethtool_opts->coalesce_types to a non-zero value will have the checks enforced. The same mask is used for global and per queue settings. v3: - move the (temporary) check if driver defines types earlier (Michal) - rename used_types -> nonzero_params, and coalesce_types -> supported_coalesce_params (Alex) - use EOPNOTSUPP instead of EINVAL (Andrew, Michal) Leaving the long series of ifs for now, it seems nice to be able to grep for the field and flag names. This will probably have to be revisited once netlink support lands. Signed-off-by: Jakub Kicinski Reviewed-by: Jacob Keller Reviewed-by: Michal Kubecek Reviewed-by: Andrew Lunn Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/ethtool.h | 45 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 23373978cb3c..e464c946bca4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -177,8 +177,44 @@ void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, const unsigned long *src); +#define ETHTOOL_COALESCE_RX_USECS BIT(0) +#define ETHTOOL_COALESCE_RX_MAX_FRAMES BIT(1) +#define ETHTOOL_COALESCE_RX_USECS_IRQ BIT(2) +#define ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ BIT(3) +#define ETHTOOL_COALESCE_TX_USECS BIT(4) +#define ETHTOOL_COALESCE_TX_MAX_FRAMES BIT(5) +#define ETHTOOL_COALESCE_TX_USECS_IRQ BIT(6) +#define ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ BIT(7) +#define ETHTOOL_COALESCE_STATS_BLOCK_USECS BIT(8) +#define ETHTOOL_COALESCE_USE_ADAPTIVE_RX BIT(9) +#define ETHTOOL_COALESCE_USE_ADAPTIVE_TX BIT(10) +#define ETHTOOL_COALESCE_PKT_RATE_LOW BIT(11) +#define ETHTOOL_COALESCE_RX_USECS_LOW BIT(12) +#define ETHTOOL_COALESCE_RX_MAX_FRAMES_LOW BIT(13) +#define ETHTOOL_COALESCE_TX_USECS_LOW BIT(14) +#define ETHTOOL_COALESCE_TX_MAX_FRAMES_LOW BIT(15) +#define ETHTOOL_COALESCE_PKT_RATE_HIGH BIT(16) +#define ETHTOOL_COALESCE_RX_USECS_HIGH BIT(17) +#define ETHTOOL_COALESCE_RX_MAX_FRAMES_HIGH BIT(18) +#define ETHTOOL_COALESCE_TX_USECS_HIGH BIT(19) +#define ETHTOOL_COALESCE_TX_MAX_FRAMES_HIGH BIT(20) +#define ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL BIT(21) + +#define ETHTOOL_COALESCE_USECS \ + (ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS) +#define ETHTOOL_COALESCE_MAX_FRAMES \ + (ETHTOOL_COALESCE_RX_MAX_FRAMES | ETHTOOL_COALESCE_TX_MAX_FRAMES) +#define ETHTOOL_COALESCE_USECS_IRQ \ + (ETHTOOL_COALESCE_RX_USECS_IRQ | ETHTOOL_COALESCE_TX_USECS_IRQ) +#define ETHTOOL_COALESCE_MAX_FRAMES_IRQ \ + (ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ | \ + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ) +#define ETHTOOL_COALESCE_USE_ADAPTIVE \ + (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | ETHTOOL_COALESCE_USE_ADAPTIVE_TX) + /** * struct ethtool_ops - optional netdev operations + * @supported_coalesce_params: supported types of interrupt coalescing. * @get_drvinfo: Report driver/device information. Should only set the * @driver, @version, @fw_version and @bus_info fields. If not * implemented, the @driver and @bus_info fields will be filled in @@ -207,8 +243,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * or zero. * @get_coalesce: Get interrupt coalescing parameters. Returns a negative * error code or zero. - * @set_coalesce: Set interrupt coalescing parameters. Returns a negative - * error code or zero. + * @set_coalesce: Set interrupt coalescing parameters. Supported coalescing + * types should be set in @supported_coalesce_params. + * Returns a negative error code or zero. * @get_ringparam: Report ring sizes * @set_ringparam: Set ring sizes. Returns a negative error code or zero. * @get_pauseparam: Report pause parameters @@ -292,7 +329,8 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * @set_per_queue_coalesce: Set interrupt coalescing parameters per queue. * It must check that the given queue number is valid. If neither a RX nor * a TX queue has this number, return -EINVAL. If only a RX queue or a TX - * queue has this number, ignore the inapplicable fields. + * queue has this number, ignore the inapplicable fields. Supported + * coalescing types should be set in @supported_coalesce_params. * Returns a negative error code or zero. * @get_link_ksettings: Get various device settings including Ethernet link * settings. The %cmd and %link_mode_masks_nwords fields should be @@ -323,6 +361,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * of the generic netdev features interface. */ struct ethtool_ops { + u32 supported_coalesce_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); -- cgit v1.2.3 From aaca9408078914380fbfd8aef3c38a34b515a654 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Mar 2020 09:16:40 +0200 Subject: net: sched: Make FIFO Qdisc offloadable Invoke ndo_setup_tc() as appropriate to signal init / replacement, destroying and dumping of pFIFO / bFIFO Qdisc. A lot of the FIFO logic is used for pFIFO_head_drop as well, but that's a semantically very different Qdisc that isn't really in the same boat as pFIFO / bFIFO. Split some of the functions to keep the Qdisc intact. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b6fedd54cd8e..654808bfad83 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -853,6 +853,7 @@ enum tc_setup_type { TC_SETUP_FT, TC_SETUP_QDISC_ETS, TC_SETUP_QDISC_TBF, + TC_SETUP_QDISC_FIFO, }; /* These structures hold the attributes of bpf state that are being passed -- cgit v1.2.3 From 70c0923b0ef10b1c8d8f78fb50fcaef8eaae619d Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 2 Mar 2020 18:25:00 -0800 Subject: PCI: Introduce pci_get_dsn Several device drivers read their Device Serial Number from the PCIe extended config space. Introduce a new helper function, pci_get_dsn(). This function reads the eight bytes of the DSN and returns them as a u64. If the capability does not exist for the device, the function returns 0. Signed-off-by: Jacob Keller Cc: Bjorn Helgaas Cc: Jeff Kirsher Cc: Michael Chan Acked-by: Bjorn Helgaas Signed-off-by: David S. Miller --- include/linux/pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 7beaf51e98ec..fc54b8922e66 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1052,6 +1052,8 @@ int pci_find_ht_capability(struct pci_dev *dev, int ht_cap); int pci_find_next_ht_capability(struct pci_dev *dev, int pos, int ht_cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); +u64 pci_get_dsn(struct pci_dev *dev); + struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device, struct pci_dev *from); struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device, @@ -1707,6 +1709,9 @@ static inline int pci_find_next_capability(struct pci_dev *dev, u8 post, static inline int pci_find_ext_capability(struct pci_dev *dev, int cap) { return 0; } +static inline u64 pci_get_dsn(struct pci_dev *dev) +{ return 0; } + /* Power management related routines */ static inline int pci_save_state(struct pci_dev *dev) { return 0; } static inline void pci_restore_state(struct pci_dev *dev) { } -- cgit v1.2.3 From 86f5d0f3d4995b8b8909da7eea235e5bc1ceefbe Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Mon, 2 Mar 2020 16:15:19 -0800 Subject: net/mlx5: Introduce egress acl forward-to-vport capability Add HCA_CAP.egress_acl_forward_to_vport field to check whether HW supports e-switch vport's egress acl to forward packets to other e-switch vport or not. By default E-Switch egress ACL forwards eswitch vports egress packets to their corresponding NIC/VF vports. With this cap enabled, the driver is allowed to alter this behavior and forward packets to arbitrary NIC/VF vports with the following limitations: a. Multiple processing paths are supported if all of the following conditions are met: - HCA_CAP.egress_acl_forward_to_vport is set ==1. - A destination of type Flow Table only appears once, as the last destination in the list. - Vport destination is supported if HCA_CAP.egress_acl_forward_to_vport==1. Vport must not be the Uplink. b. Flow_tag not supported. c. This table is only applicable after an FDB table is created. d. Push VLAN action is not supported. e. Pop VLAN action cannot be added concurrently to this table and FDB table. This feature will be used during port failover in bonding scenario where two VFs representors are bonded to handle failover egress traffic (VM's ingress/receive traffic). Signed-off-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7d89ab64b372..07be46e713fc 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -738,7 +738,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 flow_source[0x1]; u8 reserved_at_18[0x2]; u8 multi_fdb_encap[0x1]; - u8 reserved_at_1b[0x1]; + u8 egress_acl_forward_to_vport[0x1]; u8 fdb_multi_path_to_table[0x1]; u8 reserved_at_1d[0x3]; -- cgit v1.2.3 From bd673da6d933e3c8f652b011afba6cee0f8bbe45 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 2 Mar 2020 16:15:20 -0800 Subject: net/mlx5: Introduce TLS and IPSec objects enums Expose the TLS encryption key general object type enum correctly, and add the IPSec encryption key general object type enum. Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 07be46e713fc..2e98bba12356 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10489,7 +10489,8 @@ enum { }; enum { - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK = 0x1, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS = 0x1, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC = 0x2, }; struct mlx5_ifc_tls_static_params_bits { -- cgit v1.2.3 From dc392fc56f39a00a46d6db2d150571ccafe99734 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 2 Mar 2020 16:15:21 -0800 Subject: net/mlx5: Expose link speed directly Expose port rate as part of the port speed register fields. Signed-off-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2e98bba12356..d0a678c82ccd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8423,7 +8423,8 @@ struct mlx5_ifc_ptys_reg_bits { u8 proto_mask[0x3]; u8 an_status[0x4]; - u8 reserved_at_24[0x1c]; + u8 reserved_at_24[0xc]; + u8 data_rate_oper[0x10]; u8 ext_eth_proto_capability[0x20]; -- cgit v1.2.3 From e0ebd8eb36ed850a22a9a0ca83edc4a40ad67c16 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 2 Mar 2020 16:15:22 -0800 Subject: net/mlx5: HW bit for goto chain offload support Add the HW bit definition indecating goto chain offload support. Signed-off-by: Eli Cohen Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d0a678c82ccd..9b8ff4e57002 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -414,7 +414,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_16[0x1]; u8 table_miss_action_domain[0x1]; u8 termination_table[0x1]; - u8 reserved_at_19[0x7]; + u8 reformat_and_fwd_to_table[0x1]; + u8 reserved_at_1a[0x6]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; -- cgit v1.2.3 From d7f5f3c89c1a2344e88842ba0de327cc0098583e Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 5 Mar 2020 22:28:15 -0600 Subject: remoteproc: add IPA notification to q6v5 driver Set up a subdev in the q6v5 modem remoteproc driver that generates event notifications for the IPA driver to use for initialization and recovery following a modem shutdown or crash. A pair of new functions provides a way for the IPA driver to register and deregister a notification callback function that will be called whenever modem events (about to boot, running, about to shut down, etc.) occur. A void pointer value (provided by the IPA driver at registration time) and an event type are supplied to the callback function. One event, MODEM_REMOVING, is signaled whenever the q6v5 driver is about to remove the notification subdevice. It requires the IPA driver de-register its callback. This sub-device is only used by the modem subsystem (MSS) driver, so the code that adds the new subdev and allows registration and deregistration of the notifier is found in "qcom_q6v6_mss.c". Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- include/linux/remoteproc/qcom_q6v5_ipa_notify.h | 82 +++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 include/linux/remoteproc/qcom_q6v5_ipa_notify.h (limited to 'include/linux') diff --git a/include/linux/remoteproc/qcom_q6v5_ipa_notify.h b/include/linux/remoteproc/qcom_q6v5_ipa_notify.h new file mode 100644 index 000000000000..0820edc0ab7d --- /dev/null +++ b/include/linux/remoteproc/qcom_q6v5_ipa_notify.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Copyright (C) 2019 Linaro Ltd. */ + +#ifndef __QCOM_Q6V5_IPA_NOTIFY_H__ +#define __QCOM_Q6V5_IPA_NOTIFY_H__ + +#if IS_ENABLED(CONFIG_QCOM_Q6V5_IPA_NOTIFY) + +#include + +enum qcom_rproc_event { + MODEM_STARTING = 0, /* Modem is about to be started */ + MODEM_RUNNING = 1, /* Startup complete; modem is operational */ + MODEM_STOPPING = 2, /* Modem is about to shut down */ + MODEM_CRASHED = 3, /* Modem has crashed (implies stopping) */ + MODEM_OFFLINE = 4, /* Modem is now offline */ + MODEM_REMOVING = 5, /* Modem is about to be removed */ +}; + +typedef void (*qcom_ipa_notify_t)(void *data, enum qcom_rproc_event event); + +struct qcom_rproc_ipa_notify { + struct rproc_subdev subdev; + + qcom_ipa_notify_t notify; + void *data; +}; + +/** + * qcom_add_ipa_notify_subdev() - Register IPA notification subdevice + * @rproc: rproc handle + * @ipa_notify: IPA notification subdevice handle + * + * Register the @ipa_notify subdevice with the @rproc so modem events + * can be sent to IPA when they occur. + * + * This is defined in "qcom_q6v5_ipa_notify.c". + */ +void qcom_add_ipa_notify_subdev(struct rproc *rproc, + struct qcom_rproc_ipa_notify *ipa_notify); + +/** + * qcom_remove_ipa_notify_subdev() - Remove IPA SSR subdevice + * @rproc: rproc handle + * @ipa_notify: IPA notification subdevice handle + * + * This is defined in "qcom_q6v5_ipa_notify.c". + */ +void qcom_remove_ipa_notify_subdev(struct rproc *rproc, + struct qcom_rproc_ipa_notify *ipa_notify); + +/** + * qcom_register_ipa_notify() - Register IPA notification function + * @rproc: Remote processor handle + * @notify: Non-null IPA notification callback function pointer + * @data: Data supplied to IPA notification callback function + * + * @Return: 0 if successful, or a negative error code otherwise + * + * This is defined in "qcom_q6v5_mss.c". + */ +int qcom_register_ipa_notify(struct rproc *rproc, qcom_ipa_notify_t notify, + void *data); +/** + * qcom_deregister_ipa_notify() - Deregister IPA notification function + * @rproc: Remote processor handle + * + * This is defined in "qcom_q6v5_mss.c". + */ +void qcom_deregister_ipa_notify(struct rproc *rproc); + +#else /* !IS_ENABLED(CONFIG_QCOM_Q6V5_IPA_NOTIFY) */ + +struct qcom_rproc_ipa_notify { /* empty */ }; + +#define qcom_add_ipa_notify_subdev(rproc, ipa_notify) /* no-op */ +#define qcom_remove_ipa_notify_subdev(rproc, ipa_notify) /* no-op */ + +#endif /* !IS_ENABLED(CONFIG_QCOM_Q6V5_IPA_NOTIFY) */ + +#endif /* !__QCOM_Q6V5_IPA_NOTIFY_H__ */ -- cgit v1.2.3 From 7b70973d7edb2f005511102d5a2e0116464a46a1 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:32 +0000 Subject: bpf: sockmap: Only check ULP for TCP sockets The sock map code checks that a socket does not have an active upper layer protocol before inserting it into the map. This requires casting via inet_csk, which isn't valid for UDP sockets. Guard checks for ULP by checking inet_sk(sk)->is_icsk first. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-2-lmb@cloudflare.com --- include/linux/skmsg.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 112765bd146d..4d3d75d63066 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -360,7 +360,13 @@ static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { sk->sk_prot->unhash = psock->saved_unhash; - tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); + if (inet_csk_has_ulp(sk)) { + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); + } else { + sk->sk_write_space = psock->saved_write_space; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, psock->sk_proto); + } } static inline void sk_psock_set_state(struct sk_psock *psock, -- cgit v1.2.3 From 1a2e20132db7bb76dd4f97b8364bd167227dd15f Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:33 +0000 Subject: skmsg: Update saved hooks only once Only update psock->saved_* if psock->sk_proto has not been initialized yet. This allows us to get rid of tcp_bpf_reinit_sk_prot. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-3-lmb@cloudflare.com --- include/linux/skmsg.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 4d3d75d63066..2be51b7a5800 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -347,11 +347,23 @@ static inline void sk_psock_update_proto(struct sock *sk, struct sk_psock *psock, struct proto *ops) { - psock->saved_unhash = sk->sk_prot->unhash; - psock->saved_close = sk->sk_prot->close; - psock->saved_write_space = sk->sk_write_space; + /* Initialize saved callbacks and original proto only once, since this + * function may be called multiple times for a psock, e.g. when + * psock->progs.msg_parser is updated. + * + * Since we've not installed the new proto, psock is not yet in use and + * we can initialize it without synchronization. + */ + if (!psock->sk_proto) { + struct proto *orig = READ_ONCE(sk->sk_prot); + + psock->saved_unhash = orig->unhash; + psock->saved_close = orig->close; + psock->saved_write_space = sk->sk_write_space; + + psock->sk_proto = orig; + } - psock->sk_proto = sk->sk_prot; /* Pairs with lockless read in sk_clone_lock() */ WRITE_ONCE(sk->sk_prot, ops); } -- cgit v1.2.3 From f747632b608f90217a4e9ebb1deba8a37612aa32 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:36 +0000 Subject: bpf: sockmap: Move generic sockmap hooks from BPF TCP The init, close and unhash handlers from TCP sockmap are generic, and can be reused by UDP sockmap. Move the helpers into the sockmap code base and expose them. This requires tcp_bpf_get_proto and tcp_bpf_clone to be conditional on BPF_STREAM_PARSER. The moved functions are unmodified, except that sk_psock_unlink is renamed to sock_map_unlink to better match its behaviour. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-6-lmb@cloudflare.com --- include/linux/bpf.h | 4 +++- include/linux/skmsg.h | 28 ---------------------------- 2 files changed, 3 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 40c53924571d..94a329b9da81 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1419,6 +1419,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) #if defined(CONFIG_BPF_STREAM_PARSER) int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); +void sock_map_unhash(struct sock *sk); +void sock_map_close(struct sock *sk, long timeout); #else static inline int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which) @@ -1431,7 +1433,7 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr, { return -EINVAL; } -#endif +#endif /* CONFIG_BPF_STREAM_PARSER */ #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 2be51b7a5800..8a709f63c5e5 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -323,14 +323,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link) } struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock); -#if defined(CONFIG_BPF_STREAM_PARSER) -void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link); -#else -static inline void sk_psock_unlink(struct sock *sk, - struct sk_psock_link *link) -{ -} -#endif void __sk_psock_purge_ingress_msg(struct sk_psock *psock); @@ -399,26 +391,6 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock, return test_bit(bit, &psock->state); } -static inline struct sk_psock *sk_psock_get_checked(struct sock *sk) -{ - struct sk_psock *psock; - - rcu_read_lock(); - psock = sk_psock(sk); - if (psock) { - if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) { - psock = ERR_PTR(-EBUSY); - goto out; - } - - if (!refcount_inc_not_zero(&psock->refcnt)) - psock = ERR_PTR(-EBUSY); - } -out: - rcu_read_unlock(); - return psock; -} - static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; -- cgit v1.2.3 From fcb26bd2b6cab573f06e5855638368cf88e99c2b Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Mon, 9 Mar 2020 09:36:26 +0100 Subject: net: phy: Add Synopsys DesignWare XPCS MDIO module Synopsys DesignWare XPCS is an MMD that can manage link status, auto-negotiation, link training, ... In this commit we add basic support for XPCS using USXGMII interface and Clause 73 Auto-negotiation. This is highly tied with PHYLINK and can't be used without it. A given ethernet driver can use the provided callbacks to add the support for XPCS. Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- include/linux/mdio-xpcs.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/linux/mdio-xpcs.h (limited to 'include/linux') diff --git a/include/linux/mdio-xpcs.h b/include/linux/mdio-xpcs.h new file mode 100644 index 000000000000..9a841aa5982d --- /dev/null +++ b/include/linux/mdio-xpcs.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare XPCS helpers + */ + +#ifndef __LINUX_MDIO_XPCS_H +#define __LINUX_MDIO_XPCS_H + +#include +#include + +struct mdio_xpcs_args { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + struct mii_bus *bus; + int addr; +}; + +struct mdio_xpcs_ops { + int (*validate)(struct mdio_xpcs_args *xpcs, + unsigned long *supported, + struct phylink_link_state *state); + int (*config)(struct mdio_xpcs_args *xpcs, + const struct phylink_link_state *state); + int (*get_state)(struct mdio_xpcs_args *xpcs, + struct phylink_link_state *state); + int (*link_up)(struct mdio_xpcs_args *xpcs, int speed, + phy_interface_t interface); + int (*probe)(struct mdio_xpcs_args *xpcs, phy_interface_t interface); +}; + +#if IS_ENABLED(CONFIG_MDIO_XPCS) +struct mdio_xpcs_ops *mdio_xpcs_get_ops(void); +#else +static inline struct mdio_xpcs_ops *mdio_xpcs_get_ops(void) +{ + return NULL; +} +#endif + +#endif /* __LINUX_MDIO_XPCS_H */ -- cgit v1.2.3 From f213bbe8a9d6ba1d0adf424787c02f361ea78c38 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Mon, 9 Mar 2020 09:36:27 +0100 Subject: net: stmmac: Integrate it with DesignWare XPCS Adds all the necessary logic so that stmmac can be used with Synopsys DesignWare XPCS. Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 19190c609282..fbafb353e9be 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -80,6 +80,7 @@ struct stmmac_mdio_bus_data { unsigned int phy_mask; + unsigned int has_xpcs; int *irqs; int probed_phy_irq; bool needs_reset; -- cgit v1.2.3 From 812df69beb86b0e6decbb109ee3fa408dcb7fa5d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Mar 2020 19:15:06 -0700 Subject: net: liquidio: reject unsupported coalescing params Set ethtool_ops->supported_coalesce_params to let the core reject unsupported coalescing parameters. This driver did not previously reject unsupported parameters. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e464c946bca4..9efeebde3514 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -211,6 +211,11 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ) #define ETHTOOL_COALESCE_USE_ADAPTIVE \ (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | ETHTOOL_COALESCE_USE_ADAPTIVE_TX) +#define ETHTOOL_COALESCE_PKT_RATE_RX_USECS \ + (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | \ + ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_RX_USECS_HIGH | \ + ETHTOOL_COALESCE_PKT_RATE_LOW | ETHTOOL_COALESCE_PKT_RATE_HIGH | \ + ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL) /** * struct ethtool_ops - optional netdev operations -- cgit v1.2.3 From babf3164095b0670435910340c2a1eec37757b57 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 9 Mar 2020 16:10:51 -0700 Subject: bpf: Add bpf_link_new_file that doesn't install FD Add bpf_link_new_file() API for cases when we need to ensure anon_inode is successfully created before we proceed with expensive BPF program attachment procedure, which will require equally (if not more so) expensive and potentially failing compensation detachment procedure just because anon_inode creation failed. This API allows to simplify code by ensuring first that anon_inode is created and after BPF program is attached proceed with fd_install() that can't fail. After anon_inode file is created, link can't be just kfree()'d anymore, because its destruction will be performed by deferred file_operations->release call. For this, bpf_link API required specifying two separate operations: release() and dealloc(), former performing detachment only, while the latter frees memory used by bpf_link itself. dealloc() needs to be specified, because struct bpf_link is frequently embedded into link type-specific container struct (e.g., struct bpf_raw_tp_link), so bpf_link itself doesn't know how to properly free the memory. In case when anon_inode file was successfully created, but subsequent BPF attachment failed, bpf_link needs to be marked as "defunct", so that file's release() callback will perform only memory deallocation, but no detachment. Convert raw tracepoint and tracing attachment to new API and eliminate detachment from error handling path. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309231051.1270337-1-andriin@fb.com --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 94a329b9da81..4fd91b7c95ea 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1070,13 +1070,16 @@ struct bpf_link; struct bpf_link_ops { void (*release)(struct bpf_link *link); + void (*dealloc)(struct bpf_link *link); }; void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, struct bpf_prog *prog); +void bpf_link_defunct(struct bpf_link *link); void bpf_link_inc(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); +struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); struct bpf_link *bpf_link_get_from_fd(u32 ufd); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); -- cgit v1.2.3 From 0fa81b304a7973a499f844176ca031109487dd31 Mon Sep 17 00:00:00 2001 From: Alexander Bersenev Date: Fri, 6 Mar 2020 01:33:16 +0500 Subject: cdc_ncm: Implement the 32-bit version of NCM Transfer Block The NCM specification defines two formats of transfer blocks: with 16-bit fields (NTB-16) and with 32-bit fields (NTB-32). Currently only NTB-16 is implemented. This patch adds the support of NTB-32. The motivation behind this is that some devices such as E5785 or E5885 from the current generation of Huawei LTE routers do not support NTB-16. The previous generations of Huawei devices are also use NTB-32 by default. Also this patch enables NTB-32 by default for Huawei devices. During the 2019 ValdikSS made five attempts to contact Huawei to add the NTB-16 support to their router firmware, but they were unsuccessful. Signed-off-by: Alexander Bersenev Signed-off-by: David S. Miller --- include/linux/usb/cdc_ncm.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 1646c06989df..0ce4377545f8 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -46,9 +46,12 @@ #define CDC_NCM_DATA_ALTSETTING_NCM 1 #define CDC_NCM_DATA_ALTSETTING_MBIM 2 -/* CDC NCM subclass 3.2.1 */ +/* CDC NCM subclass 3.3.1 */ #define USB_CDC_NCM_NDP16_LENGTH_MIN 0x10 +/* CDC NCM subclass 3.3.2 */ +#define USB_CDC_NCM_NDP32_LENGTH_MIN 0x20 + /* Maximum NTB length */ #define CDC_NCM_NTB_MAX_SIZE_TX 32768 /* bytes */ #define CDC_NCM_NTB_MAX_SIZE_RX 32768 /* bytes */ @@ -84,7 +87,7 @@ /* Driver flags */ #define CDC_NCM_FLAG_NDP_TO_END 0x02 /* NDP is placed at end of frame */ #define CDC_MBIM_FLAG_AVOID_ALTSETTING_TOGGLE 0x04 /* Avoid altsetting toggle during init */ -#define CDC_NCM_FLAG_RESET_NTB16 0x08 /* set NDP16 one more time after altsetting switch */ +#define CDC_NCM_FLAG_PREFER_NTB32 0x08 /* prefer NDP32 over NDP16 */ #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) @@ -113,7 +116,11 @@ struct cdc_ncm_ctx { u32 timer_interval; u32 max_ndp_size; - struct usb_cdc_ncm_ndp16 *delayed_ndp16; + u8 is_ndp16; + union { + struct usb_cdc_ncm_ndp16 *delayed_ndp16; + struct usb_cdc_ncm_ndp32 *delayed_ndp32; + }; u32 tx_timer_pending; u32 tx_curr_frame_num; @@ -150,6 +157,8 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset); +int cdc_ncm_rx_verify_nth32(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); +int cdc_ncm_rx_verify_ndp32(struct sk_buff *skb_in, int ndpoffset); struct sk_buff * cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags); int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in); -- cgit v1.2.3 From 8213f6c9a275da084dc9363f36f93138547f46f1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Mar 2020 15:32:48 -0700 Subject: net: be2net: reject unsupported coalescing params Set ethtool_ops->supported_coalesce_params to let the core reject unsupported coalescing parameters. This driver did not previously reject unsupported parameters. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9efeebde3514..acfce915a02b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -211,6 +211,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ) #define ETHTOOL_COALESCE_USE_ADAPTIVE \ (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | ETHTOOL_COALESCE_USE_ADAPTIVE_TX) +#define ETHTOOL_COALESCE_USECS_LOW_HIGH \ + (ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_TX_USECS_LOW | \ + ETHTOOL_COALESCE_RX_USECS_HIGH | ETHTOOL_COALESCE_TX_USECS_HIGH) #define ETHTOOL_COALESCE_PKT_RATE_RX_USECS \ (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | \ ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_RX_USECS_HIGH | \ -- cgit v1.2.3 From 4f9546d24a12e9041eae20574ab244f5410e02c4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Mar 2020 15:32:52 -0700 Subject: net: hns: reject unsupported coalescing params Set ethtool_ops->supported_coalesce_params to let the core reject unsupported coalescing parameters. This driver did not previously reject unsupported parameters. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index acfce915a02b..be355f37337d 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -214,6 +214,11 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, #define ETHTOOL_COALESCE_USECS_LOW_HIGH \ (ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_TX_USECS_LOW | \ ETHTOOL_COALESCE_RX_USECS_HIGH | ETHTOOL_COALESCE_TX_USECS_HIGH) +#define ETHTOOL_COALESCE_MAX_FRAMES_LOW_HIGH \ + (ETHTOOL_COALESCE_RX_MAX_FRAMES_LOW | \ + ETHTOOL_COALESCE_TX_MAX_FRAMES_LOW | \ + ETHTOOL_COALESCE_RX_MAX_FRAMES_HIGH | \ + ETHTOOL_COALESCE_TX_MAX_FRAMES_HIGH) #define ETHTOOL_COALESCE_PKT_RATE_RX_USECS \ (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | \ ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_RX_USECS_HIGH | \ -- cgit v1.2.3 From 5b7cb7451585f83d414512a70b79b2086b8c6ed1 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 12 Mar 2020 12:23:03 +0200 Subject: net/mlx5: E-Switch, Enable reg c1 loopback when possible Enable reg c1 loopback if firmware reports it's supported, as this is needed for restoring packet metadata (e.g chain). Also define helper to query if it is enabled. Signed-off-by: Paul Blakey Signed-off-by: David S. Miller --- include/linux/mlx5/eswitch.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 61705e74a5bb..c16827eeba9c 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -70,6 +70,7 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); enum devlink_eswitch_encap_mode mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); +bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw); bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); /* Reg C0 usage: @@ -108,6 +109,12 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) return DEVLINK_ESWITCH_ENCAP_MODE_NONE; } +static inline bool +mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw) +{ + return false; +}; + static inline bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) { -- cgit v1.2.3 From e31a50162feb352147d3fc87b9e036703c8f2636 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 12 Mar 2020 11:44:27 -0500 Subject: bitfield.h: add FIELD_MAX() and field_max() Define FIELD_MAX(), which supplies the maximum value that can be represented by a field value. Define field_max() as well, to go along with the lower-case forms of the field mask functions. Signed-off-by: Alex Elder Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/bitfield.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 4bbb5f1c8b5b..48ea093ff04c 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -55,6 +55,19 @@ (1ULL << __bf_shf(_mask))); \ }) +/** + * FIELD_MAX() - produce the maximum value representable by a field + * @_mask: shifted mask defining the field's length and position + * + * FIELD_MAX() returns the maximum value that can be held in the field + * specified by @_mask. + */ +#define FIELD_MAX(_mask) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_MAX: "); \ + (typeof(_mask))((_mask) >> __bf_shf(_mask)); \ + }) + /** * FIELD_FIT() - check if value fits in the field * @_mask: shifted mask defining the field's length and position @@ -110,6 +123,7 @@ static __always_inline u64 field_mask(u64 field) { return field / field_multiplier(field); } +#define field_max(field) ((typeof(field))field_mask(field)) #define ____MAKE_OP(type,base,to,from) \ static __always_inline __##type type##_encode_bits(base v, base field) \ { \ -- cgit v1.2.3 From 1e2328e762548c7d17b7ba8ded9f409d05710dd1 Mon Sep 17 00:00:00 2001 From: Carlos Neira Date: Wed, 4 Mar 2020 17:41:55 -0300 Subject: fs/nsfs.c: Added ns_match ns_match returns true if the namespace inode and dev_t matches the ones provided by the caller. Signed-off-by: Carlos Neira Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200304204157.58695-2-cneirabustos@gmail.com --- include/linux/proc_ns.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 4626b1ac3b6c..adff08bfecf9 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -85,6 +85,8 @@ typedef struct ns_common *ns_get_path_helper_t(void *); extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb, void *private_data); +extern bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino); + extern int ns_get_name(char *buf, size_t size, struct task_struct *task, const struct proc_ns_operations *ns_ops); extern void nsfs_init(void); -- cgit v1.2.3 From b4490c5c4e023f09b7d27c9a9d3e7ad7d09ea6bf Mon Sep 17 00:00:00 2001 From: Carlos Neira Date: Wed, 4 Mar 2020 17:41:56 -0300 Subject: bpf: Added new helper bpf_get_ns_current_pid_tgid New bpf helper bpf_get_ns_current_pid_tgid, This helper will return pid and tgid from current task which namespace matches dev_t and inode number provided, this will allows us to instrument a process inside a container. Signed-off-by: Carlos Neira Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200304204157.58695-3-cneirabustos@gmail.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4fd91b7c95ea..4ec835334a1f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1497,6 +1497,7 @@ extern const struct bpf_func_proto bpf_strtol_proto; extern const struct bpf_func_proto bpf_strtoul_proto; extern const struct bpf_func_proto bpf_tcp_sock_proto; extern const struct bpf_func_proto bpf_jiffies64_proto; +extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); -- cgit v1.2.3 From fc6a9f86f08acd3665f788619afae0d2b2d5a480 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 10 Mar 2020 10:22:28 +0200 Subject: {IB,net}/mlx5: Assign mkey variant in mlx5_ib only mkey variant is not required for mlx5_core use, move the mkey variant counter to mlx5_ib. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f2b4225ed650..e044703c056b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -575,10 +575,6 @@ struct mlx5_priv { /* end: alloc staff */ struct dentry *dbg_root; - /* protect mkey key part */ - spinlock_t mkey_lock; - u8 mkey_key; - struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; -- cgit v1.2.3 From a3cfdd3928113012d0f2c5353277f4e27878a663 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Tue, 10 Mar 2020 10:22:30 +0200 Subject: {IB,net}/mlx5: Move asynchronous mkey creation to mlx5_ib As mlx5_ib is the only user of the mlx5_core_create_mkey_cb, move the logic inside mlx5_ib and cleanup the code in mlx5_core. Signed-off-by: Michael Guralnik Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e044703c056b..1de78f001d26 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -943,12 +943,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_async_ctx *async_ctx, u32 *in, - int inlen, u32 *out, int outlen, - mlx5_async_cbk_t callback, - struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen); -- cgit v1.2.3 From 98868668367b24487c0b0b3298d7ca98409baf07 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 12 Mar 2020 17:21:28 -0700 Subject: bpf: Abstract away entire bpf_link clean up procedure Instead of requiring users to do three steps for cleaning up bpf_link, its anon_inode file, and unused fd, abstract that away into bpf_link_cleanup() helper. bpf_link_defunct() is removed, as it shouldn't be needed as an individual operation anymore. v1->v2: - keep bpf_link_cleanup() static for now (Daniel). Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200313002128.2028680-1-andriin@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4ec835334a1f..c2f815e9f7d0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1075,7 +1075,6 @@ struct bpf_link_ops { void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, struct bpf_prog *prog); -void bpf_link_defunct(struct bpf_link *link); void bpf_link_inc(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); -- cgit v1.2.3 From 6a64037d4bf252bb8cf13917320c8e001da8997a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 12 Mar 2020 20:55:57 +0100 Subject: bpf: Add bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER, so all the dispatchers have the common name prefix. And also a small '_' cleanup for bpf_dispatcher_nopfunc function name. Signed-off-by: Björn Töpel Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200312195610.346362-3-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 21 +++++++++++---------- include/linux/filter.h | 9 ++++----- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c2f815e9f7d0..fe1f8b075378 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -522,7 +522,7 @@ struct bpf_dispatcher { u32 image_off; }; -static __always_inline unsigned int bpf_dispatcher_nopfunc( +static __always_inline unsigned int bpf_dispatcher_nop_func( const void *ctx, const struct bpf_insn *insnsi, unsigned int (*bpf_func)(const void *, @@ -537,7 +537,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); #define BPF_DISPATCHER_INIT(name) { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ - .func = &name##func, \ + .func = &name##_func, \ .progs = {}, \ .num_progs = 0, \ .image = NULL, \ @@ -545,7 +545,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr); } #define DEFINE_BPF_DISPATCHER(name) \ - noinline unsigned int name##func( \ + noinline unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ unsigned int (*bpf_func)(const void *, \ @@ -553,17 +553,18 @@ void bpf_trampoline_put(struct bpf_trampoline *tr); { \ return bpf_func(ctx, insnsi); \ } \ - EXPORT_SYMBOL(name##func); \ - struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name); + EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ + struct bpf_dispatcher bpf_dispatcher_##name = \ + BPF_DISPATCHER_INIT(bpf_dispatcher_##name); #define DECLARE_BPF_DISPATCHER(name) \ - unsigned int name##func( \ + unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ unsigned int (*bpf_func)(const void *, \ const struct bpf_insn *)); \ - extern struct bpf_dispatcher name; -#define BPF_DISPATCHER_FUNC(name) name##func -#define BPF_DISPATCHER_PTR(name) (&name) + extern struct bpf_dispatcher bpf_dispatcher_##name; +#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func +#define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); struct bpf_image { @@ -589,7 +590,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) #define DECLARE_BPF_DISPATCHER(name) -#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc +#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nop_func #define BPF_DISPATCHER_PTR(name) NULL static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, diff --git a/include/linux/filter.h b/include/linux/filter.h index 43b5e455d2f5..6249679275b3 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -577,7 +577,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); ret; }) #define BPF_PROG_RUN(prog, ctx) \ - __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc) + __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func) /* * Use in preemptible and therefore migratable context to make sure that @@ -596,7 +596,7 @@ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, u32 ret; migrate_disable(); - ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc); + ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func); migrate_enable(); return ret; } @@ -722,7 +722,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, return res; } -DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp) +DECLARE_BPF_DISPATCHER(xdp) static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, struct xdp_buff *xdp) @@ -733,8 +733,7 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, * already takes rcu_read_lock() when fetching the program, so * it's not necessary here anymore. */ - return __BPF_PROG_RUN(prog, xdp, - BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp)); + return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); } void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); -- cgit v1.2.3 From 535911c80ad4f5801700e9d827a1985bbff41519 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:55:58 +0100 Subject: bpf: Add struct bpf_ksym Adding 'struct bpf_ksym' object that will carry the kallsym information for bpf symbol. Adding the start and end address to begin with. It will be used by bpf_prog, bpf_trampoline, bpf_dispatcher objects. The symbol_start/symbol_end values were originally used to sort bpf_prog objects. For the address displayed in /proc/kallsyms we are using prog->bpf_func value. I'm using the bpf_func value for program symbol start instead of the symbol_start, because it makes no difference for sorting bpf_prog objects and we can use it directly as an address to display it in /proc/kallsyms. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200312195610.346362-4-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fe1f8b075378..6ca3d5c8ccf3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -471,6 +471,11 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, u64 notrace __bpf_prog_enter(void); void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); +struct bpf_ksym { + unsigned long start; + unsigned long end; +}; + enum bpf_tramp_prog_type { BPF_TRAMP_FENTRY, BPF_TRAMP_FEXIT, @@ -653,6 +658,7 @@ struct bpf_prog_aux { u32 size_poke_tab; struct latch_tree_node ksym_tnode; struct list_head ksym_lnode; + struct bpf_ksym ksym; const struct bpf_prog_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; -- cgit v1.2.3 From bfea9a8574f34597581f74f792d044d38497b775 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:55:59 +0100 Subject: bpf: Add name to struct bpf_ksym Adding name to 'struct bpf_ksym' object to carry the name of the symbol for bpf_prog, bpf_trampoline, bpf_dispatcher objects. The current benefit is that name is now generated only when the symbol is added to the list, so we don't need to generate it every time it's accessed. The future benefit is that we will have all the bpf objects symbols represented by struct bpf_ksym. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200312195610.346362-5-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ include/linux/filter.h | 6 ------ 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6ca3d5c8ccf3..047b44deb3c5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -18,6 +18,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -474,6 +475,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); struct bpf_ksym { unsigned long start; unsigned long end; + char name[KSYM_NAME_LEN]; }; enum bpf_tramp_prog_type { diff --git a/include/linux/filter.h b/include/linux/filter.h index 6249679275b3..9b5aa5c483cc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1083,7 +1083,6 @@ bpf_address_lookup(unsigned long addr, unsigned long *size, void bpf_prog_kallsyms_add(struct bpf_prog *fp); void bpf_prog_kallsyms_del(struct bpf_prog *fp); -void bpf_get_prog_name(const struct bpf_prog *prog, char *sym); #else /* CONFIG_BPF_JIT */ @@ -1152,11 +1151,6 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) { } -static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) -{ - sym[0] = '\0'; -} - #endif /* CONFIG_BPF_JIT */ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp); -- cgit v1.2.3 From ecb60d1c670e9b205197d8e4381b19e77bc2d834 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:00 +0100 Subject: bpf: Move lnode list node to struct bpf_ksym Adding lnode list node to 'struct bpf_ksym' object, so the struct bpf_ksym itself can be chained and used in other objects like bpf_trampoline and bpf_dispatcher. Changing iterator to bpf_ksym in bpf_get_kallsym function. The ksym->start is holding the prog->bpf_func value, so it's ok to use it as value in bpf_get_kallsym. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200312195610.346362-6-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 047b44deb3c5..4fad2fa4135c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -476,6 +476,7 @@ struct bpf_ksym { unsigned long start; unsigned long end; char name[KSYM_NAME_LEN]; + struct list_head lnode; }; enum bpf_tramp_prog_type { @@ -659,7 +660,6 @@ struct bpf_prog_aux { struct bpf_jit_poke_descriptor *poke_tab; u32 size_poke_tab; struct latch_tree_node ksym_tnode; - struct list_head ksym_lnode; struct bpf_ksym ksym; const struct bpf_prog_ops *ops; struct bpf_map **used_maps; -- cgit v1.2.3 From ca4424c920f574b7246ff1b6d83cfdfd709e42c8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:01 +0100 Subject: bpf: Move ksym_tnode to bpf_ksym Moving ksym_tnode list node to 'struct bpf_ksym' object, so the symbol itself can be chained and used in other objects like bpf_trampoline and bpf_dispatcher. We need bpf_ksym object to be linked both in bpf_kallsyms via lnode for /proc/kallsyms and in bpf_tree via tnode for bpf address lookup functions like __bpf_address_lookup or bpf_prog_kallsyms_find. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200312195610.346362-7-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4fad2fa4135c..68d66b0078df 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -477,6 +477,7 @@ struct bpf_ksym { unsigned long end; char name[KSYM_NAME_LEN]; struct list_head lnode; + struct latch_tree_node tnode; }; enum bpf_tramp_prog_type { @@ -659,7 +660,6 @@ struct bpf_prog_aux { void *jit_data; /* JIT specific data. arch dependent */ struct bpf_jit_poke_descriptor *poke_tab; u32 size_poke_tab; - struct latch_tree_node ksym_tnode; struct bpf_ksym ksym; const struct bpf_prog_ops *ops; struct bpf_map **used_maps; -- cgit v1.2.3 From cbd76f8d5ac9c4e99c4ffe5e39a1e907cdf5a76f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:03 +0100 Subject: bpf: Add prog flag to struct bpf_ksym object Adding 'prog' bool flag to 'struct bpf_ksym' to mark that this object belongs to bpf_prog object. This change allows having bpf_prog objects together with other types (trampolines and dispatchers) in the single bpf_tree. It's used when searching for bpf_prog exception tables by the bpf_prog_ksym_find function, where we need to get the bpf_prog pointer. >From now we can safely add bpf_ksym support for trampoline or dispatcher objects, because we can differentiate them from bpf_prog objects. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200312195610.346362-9-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 68d66b0078df..a0cef664c1a9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -478,6 +478,7 @@ struct bpf_ksym { char name[KSYM_NAME_LEN]; struct list_head lnode; struct latch_tree_node tnode; + bool prog; }; enum bpf_tramp_prog_type { -- cgit v1.2.3 From dba122fb5e122e8e07e2f11cdebc10ba4f425cf7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:04 +0100 Subject: bpf: Add bpf_ksym_add/del functions Separating /proc/kallsyms add/del code and adding bpf_ksym_add/del functions for that. Moving bpf_prog_ksym_node_add/del functions to __bpf_ksym_add/del and changing their argument to 'struct bpf_ksym' object. This way we can call them for other bpf objects types like trampoline and dispatcher. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200312195610.346362-10-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a0cef664c1a9..ec1de88b8487 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -584,6 +584,9 @@ struct bpf_image { #define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image)) bool is_bpf_image_address(unsigned long address); void *bpf_image_alloc(void); +/* Called only from JIT-enabled code, so there's no need for stubs. */ +void bpf_ksym_add(struct bpf_ksym *ksym); +void bpf_ksym_del(struct bpf_ksym *ksym); #else static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { -- cgit v1.2.3 From a108f7dcfa010e3da825af90d77ac0a6a0240992 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:05 +0100 Subject: bpf: Add trampolines to kallsyms Adding trampolines to kallsyms. It's displayed as bpf_trampoline_ [bpf] where ID is the BTF id of the trampoline function. Adding bpf_image_ksym_add/del functions that setup the start/end values and call KSYMBOL perf events handlers. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200312195610.346362-11-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ec1de88b8487..083860be1944 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -513,6 +513,7 @@ struct bpf_trampoline { /* Executable image of trampoline */ void *image; u64 selector; + struct bpf_ksym ksym; }; #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */ @@ -585,6 +586,8 @@ struct bpf_image { bool is_bpf_image_address(unsigned long address); void *bpf_image_alloc(void); /* Called only from JIT-enabled code, so there's no need for stubs. */ +void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); +void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); #else -- cgit v1.2.3 From 517b75e44c7be9c776aa5f7beaa85baff3868f80 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:06 +0100 Subject: bpf: Add dispatchers to kallsyms Adding dispatchers to kallsyms. It's displayed as bpf_dispatcher_ where NAME is the name of dispatcher. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200312195610.346362-12-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 083860be1944..86cacb54ba23 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -531,6 +531,7 @@ struct bpf_dispatcher { int num_progs; void *image; u32 image_off; + struct bpf_ksym ksym; }; static __always_inline unsigned int bpf_dispatcher_nop_func( @@ -546,13 +547,17 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); -#define BPF_DISPATCHER_INIT(name) { \ - .mutex = __MUTEX_INITIALIZER(name.mutex), \ - .func = &name##_func, \ - .progs = {}, \ - .num_progs = 0, \ - .image = NULL, \ - .image_off = 0 \ +#define BPF_DISPATCHER_INIT(_name) { \ + .mutex = __MUTEX_INITIALIZER(_name.mutex), \ + .func = &_name##_func, \ + .progs = {}, \ + .num_progs = 0, \ + .image = NULL, \ + .image_off = 0, \ + .ksym = { \ + .name = #_name, \ + .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ + }, \ } #define DEFINE_BPF_DISPATCHER(name) \ -- cgit v1.2.3 From 7ac88eba185b4d0e06a71678e54bc092edcd3af3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:07 +0100 Subject: bpf: Remove bpf_image tree Now that we have all the objects (bpf_prog, bpf_trampoline, bpf_dispatcher) linked in bpf_tree, there's no need to have separate bpf_image tree for images. Reverting the bpf_image tree together with struct bpf_image, because it's no longer needed. Also removing bpf_image_alloc function and adding the original bpf_jit_alloc_exec_page interface instead. The kernel_text_address function can now rely only on is_bpf_text_address, because it checks the bpf_tree that contains all the objects. Keeping bpf_image_ksym_add and bpf_image_ksym_del because they are useful wrappers with perf's ksymbol interface calls. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200312195610.346362-13-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 86cacb54ba23..bdb981c204fa 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -583,14 +583,8 @@ void bpf_trampoline_put(struct bpf_trampoline *tr); #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); -struct bpf_image { - struct latch_tree_node tnode; - unsigned char data[]; -}; -#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image)) -bool is_bpf_image_address(unsigned long address); -void *bpf_image_alloc(void); /* Called only from JIT-enabled code, so there's no need for stubs. */ +void *bpf_jit_alloc_exec_page(void); void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); -- cgit v1.2.3 From 58b05e58d155fd5a9a181d51b4c9c8a69a0816d3 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Thu, 12 Mar 2020 18:10:09 +0100 Subject: net: phy: Add XLGMII interface define Add a define for XLGMII interface. Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7a08023bdbc5..6b872aed8ba6 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -94,6 +94,7 @@ typedef enum { PHY_INTERFACE_MODE_RTBI, PHY_INTERFACE_MODE_SMII, PHY_INTERFACE_MODE_XGMII, + PHY_INTERFACE_MODE_XLGMII, PHY_INTERFACE_MODE_MOCA, PHY_INTERFACE_MODE_QSGMII, PHY_INTERFACE_MODE_TRGMII, @@ -165,6 +166,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "smii"; case PHY_INTERFACE_MODE_XGMII: return "xgmii"; + case PHY_INTERFACE_MODE_XLGMII: + return "xlgmii"; case PHY_INTERFACE_MODE_MOCA: return "moca"; case PHY_INTERFACE_MODE_QSGMII: -- cgit v1.2.3 From 6daf14140129d30207ed6a0a69851fa6a3636bda Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 20 Feb 2020 07:59:14 -0600 Subject: netfilter: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] Lastly, fix checkpatch.pl warning WARNING: __aligned(size) is preferred over __attribute__((aligned(size))) in net/bridge/netfilter/ebtables.c This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 2 +- include/linux/netfilter/x_tables.h | 8 ++++---- include/linux/netfilter_arp/arp_tables.h | 2 +- include/linux/netfilter_bridge/ebtables.h | 2 +- include/linux/netfilter_ipv4/ip_tables.h | 2 +- include/linux/netfilter_ipv6/ip6_tables.h | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 5448c8b443db..ab192720e2d6 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -98,7 +98,7 @@ struct ip_set_counter { struct ip_set_comment_rcu { struct rcu_head rcu; - char str[0]; + char str[]; }; struct ip_set_comment { diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1b261c51b3a3..5da88451853b 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -264,7 +264,7 @@ struct xt_table_info { unsigned int stacksize; void ***jumpstack; - unsigned char entries[0] __aligned(8); + unsigned char entries[] __aligned(8); }; int xt_register_target(struct xt_target *target); @@ -464,7 +464,7 @@ struct compat_xt_entry_match { } kernel; u_int16_t match_size; } u; - unsigned char data[0]; + unsigned char data[]; }; struct compat_xt_entry_target { @@ -480,7 +480,7 @@ struct compat_xt_entry_target { } kernel; u_int16_t target_size; } u; - unsigned char data[0]; + unsigned char data[]; }; /* FIXME: this works only on 32 bit tasks @@ -494,7 +494,7 @@ struct compat_xt_counters { struct compat_xt_counters_info { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t num_counters; - struct compat_xt_counters counters[0]; + struct compat_xt_counters counters[]; }; struct _compat_xt_align { diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index e98028f00e47..7d3537c40ec9 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -67,7 +67,7 @@ struct compat_arpt_entry { __u16 next_offset; compat_uint_t comefrom; struct compat_xt_counters counters; - unsigned char elems[0]; + unsigned char elems[]; }; static inline struct xt_entry_target * diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 162f59d0d17a..2f5c4e6ecd8a 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -85,7 +85,7 @@ struct ebt_table_info { /* room to maintain the stack used for jumping from and into udc */ struct ebt_chainstack **chainstack; char *entries; - struct ebt_counter counters[0] ____cacheline_aligned; + struct ebt_counter counters[] ____cacheline_aligned; }; struct ebt_table { diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index e9e1ed74cdf1..b394bd4f68a3 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -76,7 +76,7 @@ struct compat_ipt_entry { __u16 next_offset; compat_uint_t comefrom; struct compat_xt_counters counters; - unsigned char elems[0]; + unsigned char elems[]; }; /* Helper functions */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 78ab959c4575..8225f7821a29 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -43,7 +43,7 @@ struct compat_ip6t_entry { __u16 next_offset; compat_uint_t comefrom; struct compat_xt_counters counters; - unsigned char elems[0]; + unsigned char elems[]; }; static inline struct xt_entry_target * -- cgit v1.2.3 From f6554187855a4ddce235d22cc1486c118846b592 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 14 Mar 2020 10:09:53 +0000 Subject: net: mii: convert mii_lpa_to_ethtool_lpa_x() to linkmode variant Add a LPA to linkmode decoder for 1000BASE-X protocols; this decoder only provides the modify semantics similar to other such decoders. This replaces the unused mii_lpa_to_ethtool_lpa_x() helper. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mii.h | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 18c6208f56fc..309de4a3e6e7 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -354,24 +354,6 @@ static inline u32 mii_adv_to_ethtool_adv_x(u32 adv) return result; } -/** - * mii_lpa_to_ethtool_lpa_x - * @adv: value of the MII_LPA register - * - * A small helper function that translates MII_LPA - * bits, when in 1000Base-X mode, to ethtool - * LP advertisement settings. - */ -static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa) -{ - u32 result = 0; - - if (lpa & LPA_LPACK) - result |= ADVERTISED_Autoneg; - - return result | mii_adv_to_ethtool_adv_x(lpa); -} - /** * mii_lpa_mod_linkmode_adv_sgmii * @lp_advertising: pointer to destination link mode. @@ -535,6 +517,25 @@ static inline u32 linkmode_adv_to_lcl_adv_t(unsigned long *advertising) return lcl_adv; } +/** + * mii_lpa_mod_linkmode_x - decode the link partner's config_reg to linkmodes + * @linkmodes: link modes array + * @lpa: config_reg word from link partner + * @fd_bit: link mode for 1000XFULL bit + */ +static inline void mii_lpa_mod_linkmode_x(unsigned long *linkmodes, u16 lpa, + int fd_bit) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, linkmodes, + lpa & LPA_LPACK); + linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, linkmodes, + lpa & LPA_1000XPAUSE); + linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, linkmodes, + lpa & LPA_1000XPAUSE_ASYM); + linkmode_mod_bit(fd_bit, linkmodes, + lpa & LPA_1000XFULL); +} + /** * mii_advertise_flowctrl - get flow control advertisement flags * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both) -- cgit v1.2.3 From a9f28eba6eaab651e4cec6d936a107b03d61754a Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 14 Mar 2020 10:09:58 +0000 Subject: net: mii: add linkmode_adv_to_mii_adv_x() Add a helper to convert a linkmode advertisement to a clause 37 advertisement value for 1000base-x and 2500base-x. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mii.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 309de4a3e6e7..219b93cad1dd 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -536,6 +536,26 @@ static inline void mii_lpa_mod_linkmode_x(unsigned long *linkmodes, u16 lpa, lpa & LPA_1000XFULL); } +/** + * linkmode_adv_to_mii_adv_x - encode a linkmode to config_reg + * @linkmodes: linkmodes + * @fd_bit: full duplex bit + */ +static inline u16 linkmode_adv_to_mii_adv_x(const unsigned long *linkmodes, + int fd_bit) +{ + u16 adv = 0; + + if (linkmode_test_bit(fd_bit, linkmodes)) + adv |= ADVERTISE_1000XFULL; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, linkmodes)) + adv |= ADVERTISE_1000XPAUSE; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, linkmodes)) + adv |= ADVERTISE_1000XPSE_ASYM; + + return adv; +} + /** * mii_advertise_flowctrl - get flow control advertisement flags * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both) -- cgit v1.2.3 From b030f194aed290705426c62e501201c0739405c5 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 11 Mar 2020 12:59:01 +0100 Subject: netfilter: Rename ingress hook include file Prepare for addition of a netfilter egress hook by renaming to . The egress hook also necessitates a refactoring of the include file, but that is done in a separate commit to ease reviewing. No functional change intended. Signed-off-by: Lukas Wunner Cc: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ingress.h | 58 --------------------------------------- include/linux/netfilter_netdev.h | 58 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 58 deletions(-) delete mode 100644 include/linux/netfilter_ingress.h create mode 100644 include/linux/netfilter_netdev.h (limited to 'include/linux') diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h deleted file mode 100644 index a13774be2eb5..000000000000 --- a/include/linux/netfilter_ingress.h +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NETFILTER_INGRESS_H_ -#define _NETFILTER_INGRESS_H_ - -#include -#include - -#ifdef CONFIG_NETFILTER_INGRESS -static inline bool nf_hook_ingress_active(const struct sk_buff *skb) -{ -#ifdef CONFIG_JUMP_LABEL - if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) - return false; -#endif - return rcu_access_pointer(skb->dev->nf_hooks_ingress); -} - -/* caller must hold rcu_read_lock */ -static inline int nf_hook_ingress(struct sk_buff *skb) -{ - struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); - struct nf_hook_state state; - int ret; - - /* Must recheck the ingress hook head, in the event it became NULL - * after the check in nf_hook_ingress_active evaluated to true. - */ - if (unlikely(!e)) - return 0; - - nf_hook_state_init(&state, NF_NETDEV_INGRESS, - NFPROTO_NETDEV, skb->dev, NULL, NULL, - dev_net(skb->dev), NULL); - ret = nf_hook_slow(skb, &state, e, 0); - if (ret == 0) - return -1; - - return ret; -} - -static inline void nf_hook_ingress_init(struct net_device *dev) -{ - RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); -} -#else /* CONFIG_NETFILTER_INGRESS */ -static inline int nf_hook_ingress_active(struct sk_buff *skb) -{ - return 0; -} - -static inline int nf_hook_ingress(struct sk_buff *skb) -{ - return 0; -} - -static inline void nf_hook_ingress_init(struct net_device *dev) {} -#endif /* CONFIG_NETFILTER_INGRESS */ -#endif /* _NETFILTER_INGRESS_H_ */ diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h new file mode 100644 index 000000000000..a13774be2eb5 --- /dev/null +++ b/include/linux/netfilter_netdev.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NETFILTER_INGRESS_H_ +#define _NETFILTER_INGRESS_H_ + +#include +#include + +#ifdef CONFIG_NETFILTER_INGRESS +static inline bool nf_hook_ingress_active(const struct sk_buff *skb) +{ +#ifdef CONFIG_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) + return false; +#endif + return rcu_access_pointer(skb->dev->nf_hooks_ingress); +} + +/* caller must hold rcu_read_lock */ +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); + struct nf_hook_state state; + int ret; + + /* Must recheck the ingress hook head, in the event it became NULL + * after the check in nf_hook_ingress_active evaluated to true. + */ + if (unlikely(!e)) + return 0; + + nf_hook_state_init(&state, NF_NETDEV_INGRESS, + NFPROTO_NETDEV, skb->dev, NULL, NULL, + dev_net(skb->dev), NULL); + ret = nf_hook_slow(skb, &state, e, 0); + if (ret == 0) + return -1; + + return ret; +} + +static inline void nf_hook_ingress_init(struct net_device *dev) +{ + RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); +} +#else /* CONFIG_NETFILTER_INGRESS */ +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return 0; +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + return 0; +} + +static inline void nf_hook_ingress_init(struct net_device *dev) {} +#endif /* CONFIG_NETFILTER_INGRESS */ +#endif /* _NETFILTER_INGRESS_H_ */ -- cgit v1.2.3 From 5418d3881e1f5d2cf9c1076eb8bd85770393a0e8 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 11 Mar 2020 12:59:02 +0100 Subject: netfilter: Generalize ingress hook Prepare for addition of a netfilter egress hook by generalizing the ingress hook introduced by commit e687ad60af09 ("netfilter: add netfilter ingress hook after handle_ing() under unique static key"). In particular, rename and refactor the ingress hook's static inlines such that they can be reused for an egress hook. No functional change intended. Signed-off-by: Lukas Wunner Cc: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_netdev.h | 45 +++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index a13774be2eb5..49e26479642e 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -1,34 +1,37 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NETFILTER_INGRESS_H_ -#define _NETFILTER_INGRESS_H_ +#ifndef _NETFILTER_NETDEV_H_ +#define _NETFILTER_NETDEV_H_ #include #include -#ifdef CONFIG_NETFILTER_INGRESS -static inline bool nf_hook_ingress_active(const struct sk_buff *skb) +#ifdef CONFIG_NETFILTER +static __always_inline bool nf_hook_netdev_active(enum nf_dev_hooks hooknum, + struct nf_hook_entries __rcu *hooks) { #ifdef CONFIG_JUMP_LABEL - if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][hooknum])) return false; #endif - return rcu_access_pointer(skb->dev->nf_hooks_ingress); + return rcu_access_pointer(hooks); } /* caller must hold rcu_read_lock */ -static inline int nf_hook_ingress(struct sk_buff *skb) +static __always_inline int nf_hook_netdev(struct sk_buff *skb, + enum nf_dev_hooks hooknum, + struct nf_hook_entries __rcu *hooks) { - struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); + struct nf_hook_entries *e = rcu_dereference(hooks); struct nf_hook_state state; int ret; - /* Must recheck the ingress hook head, in the event it became NULL - * after the check in nf_hook_ingress_active evaluated to true. + /* Must recheck the hook head, in the event it became NULL + * after the check in nf_hook_netdev_active evaluated to true. */ if (unlikely(!e)) return 0; - nf_hook_state_init(&state, NF_NETDEV_INGRESS, + nf_hook_state_init(&state, hooknum, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); ret = nf_hook_slow(skb, &state, e, 0); @@ -37,10 +40,26 @@ static inline int nf_hook_ingress(struct sk_buff *skb) return ret; } +#endif /* CONFIG_NETFILTER */ -static inline void nf_hook_ingress_init(struct net_device *dev) +static inline void nf_hook_netdev_init(struct net_device *dev) { +#ifdef CONFIG_NETFILTER_INGRESS RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); +#endif +} + +#ifdef CONFIG_NETFILTER_INGRESS +static inline bool nf_hook_ingress_active(const struct sk_buff *skb) +{ + return nf_hook_netdev_active(NF_NETDEV_INGRESS, + skb->dev->nf_hooks_ingress); +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + return nf_hook_netdev(skb, NF_NETDEV_INGRESS, + skb->dev->nf_hooks_ingress); } #else /* CONFIG_NETFILTER_INGRESS */ static inline int nf_hook_ingress_active(struct sk_buff *skb) @@ -52,7 +71,5 @@ static inline int nf_hook_ingress(struct sk_buff *skb) { return 0; } - -static inline void nf_hook_ingress_init(struct net_device *dev) {} #endif /* CONFIG_NETFILTER_INGRESS */ #endif /* _NETFILTER_INGRESS_H_ */ -- cgit v1.2.3 From 8537f78647c072bdb1a5dbe32e1c7e5b13ff1258 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 11 Mar 2020 12:59:03 +0100 Subject: netfilter: Introduce egress hook Commit e687ad60af09 ("netfilter: add netfilter ingress hook after handle_ing() under unique static key") introduced the ability to classify packets on ingress. Allow the same on egress. Position the hook immediately before a packet is handed to tc and then sent out on an interface, thereby mirroring the ingress order. This order allows marking packets in the netfilter egress hook and subsequently using the mark in tc. Another benefit of this order is consistency with a lot of existing documentation which says that egress tc is performed after netfilter hooks. Egress hooks already exist for the most common protocols, such as NF_INET_LOCAL_OUT or NF_ARP_OUT, and those are to be preferred because they are executed earlier during packet processing. However for more exotic protocols, there is currently no provision to apply netfilter on egress. A common workaround is to enslave the interface to a bridge and use ebtables, or to resort to tc. But when the ingress hook was introduced, consensus was that users should be given the choice to use netfilter or tc, whichever tool suits their needs best: https://lore.kernel.org/netdev/20150430153317.GA3230@salvia/ This hook is also useful for NAT46/NAT64, tunneling and filtering of locally generated af_packet traffic such as dhclient. There have also been occasional user requests for a netfilter egress hook in the past, e.g.: https://www.spinics.net/lists/netfilter/msg50038.html Performance measurements with pktgen surprisingly show a speedup rather than a slowdown with this commit: * Without this commit: Result: OK: 34240933(c34238375+d2558) usec, 100000000 (60byte,0frags) 2920481pps 1401Mb/sec (1401830880bps) errors: 0 * With this commit: Result: OK: 33997299(c33994193+d3106) usec, 100000000 (60byte,0frags) 2941410pps 1411Mb/sec (1411876800bps) errors: 0 * Without this commit + tc egress: Result: OK: 39022386(c39019547+d2839) usec, 100000000 (60byte,0frags) 2562631pps 1230Mb/sec (1230062880bps) errors: 0 * With this commit + tc egress: Result: OK: 37604447(c37601877+d2570) usec, 100000000 (60byte,0frags) 2659259pps 1276Mb/sec (1276444320bps) errors: 0 * With this commit + nft egress: Result: OK: 41436689(c41434088+d2600) usec, 100000000 (60byte,0frags) 2413320pps 1158Mb/sec (1158393600bps) errors: 0 Tested on a bare-metal Core i7-3615QM, each measurement was performed three times to verify that the numbers are stable. Commands to perform a measurement: modprobe pktgen echo "add_device lo@3" > /proc/net/pktgen/kpktgend_3 samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh -i 'lo@3' -n 100000000 Commands for testing tc egress: tc qdisc add dev lo clsact tc filter add dev lo egress protocol ip prio 1 u32 match ip dst 4.3.2.1/32 Commands for testing nft egress: nft add table netdev t nft add chain netdev t co \{ type filter hook egress device lo priority 0 \; \} nft add rule netdev t co ip daddr 4.3.2.1/32 drop All testing was performed on the loopback interface to avoid distorting measurements by the packet handling in the low-level Ethernet driver. Signed-off-by: Lukas Wunner Cc: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice.h | 4 ++++ include/linux/netfilter_netdev.h | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 654808bfad83..15f1e32b430c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1751,6 +1751,7 @@ enum netdev_priv_flags { * @xps_maps: XXX: need comments on this one * @miniq_egress: clsact qdisc specific data for * egress processing + * @nf_hooks_egress: netfilter hooks executed for egress packets * @qdisc_hash: qdisc hash table * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) @@ -2026,6 +2027,9 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc __rcu *miniq_egress; #endif +#ifdef CONFIG_NETFILTER_EGRESS + struct nf_hook_entries __rcu *nf_hooks_egress; +#endif #ifdef CONFIG_NET_SCHED DECLARE_HASHTABLE (qdisc_hash, 4); diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index 49e26479642e..92d3611a782e 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -47,6 +47,9 @@ static inline void nf_hook_netdev_init(struct net_device *dev) #ifdef CONFIG_NETFILTER_INGRESS RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); #endif +#ifdef CONFIG_NETFILTER_EGRESS + RCU_INIT_POINTER(dev->nf_hooks_egress, NULL); +#endif } #ifdef CONFIG_NETFILTER_INGRESS @@ -72,4 +75,28 @@ static inline int nf_hook_ingress(struct sk_buff *skb) return 0; } #endif /* CONFIG_NETFILTER_INGRESS */ + +#ifdef CONFIG_NETFILTER_EGRESS +static inline bool nf_hook_egress_active(const struct sk_buff *skb) +{ + return nf_hook_netdev_active(NF_NETDEV_EGRESS, + skb->dev->nf_hooks_egress); +} + +static inline int nf_hook_egress(struct sk_buff *skb) +{ + return nf_hook_netdev(skb, NF_NETDEV_EGRESS, + skb->dev->nf_hooks_egress); +} +#else /* CONFIG_NETFILTER_EGRESS */ +static inline int nf_hook_egress_active(struct sk_buff *skb) +{ + return 0; +} + +static inline int nf_hook_egress(struct sk_buff *skb) +{ + return 0; +} +#endif /* CONFIG_NETFILTER_EGRESS */ #endif /* _NETFILTER_INGRESS_H_ */ -- cgit v1.2.3 From 613f53fe09a27f928a7d05132e1a74b5136e8f04 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 24 Feb 2020 16:59:54 +0200 Subject: net/mlx5: Eswitch, enable forwarding back to uplink port Add dependencny on cap termination_table_raw_traffic to allow non encapsulated packets received from uplink to be forwarded back to the received uplink port. Refactor the conditions into a separate function. Signed-off-by: Eli Cohen Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2bd920965bd3..cc55cee3b53c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -416,7 +416,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 termination_table[0x1]; u8 reformat_and_fwd_to_table[0x1]; u8 reserved_at_1a[0x6]; - u8 reserved_at_20[0x2]; + u8 termination_table_raw_traffic[0x1]; + u8 reserved_at_21[0x1]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; u8 max_modify_header_actions[0x8]; -- cgit v1.2.3 From 9000edb71ab29d184aa33f5a77fa6e52d8812bb9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Mar 2020 13:47:12 -0700 Subject: net: ethtool: require drivers to set supported_coalesce_params Now that all in-tree drivers have been updated we can make the supported_coalesce_params mandatory. To save debugging time in case some driver was missed (or is out of tree) add a warning when netdev is registered with set_coalesce but without supported_coalesce_params. Signed-off-by: Jakub Kicinski Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index be355f37337d..c1d379bf6ee1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -458,6 +458,8 @@ struct ethtool_ops { struct ethtool_stats *, u64 *); }; +int ethtool_check_ops(const struct ethtool_ops *ops); + struct ethtool_rx_flow_rule { struct flow_rule *rule; unsigned long priv[0]; -- cgit v1.2.3 From 9010f9deb000edce823cb79345f137742ccffa19 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 16 Mar 2020 22:32:33 +0100 Subject: net: phy: improve phy_driver callback handle_interrupt did_interrupt() clears the interrupt, therefore handle_interrupt() can not check which event triggered the interrupt. To overcome this constraint and allow more flexibility for customer interrupt handlers, let's decouple handle_interrupt() from parts of the phylib interrupt handling. Custom interrupt handlers now have to implement the did_interrupt() functionality in handle_interrupt() if needed. Fortunately we have just one custom interrupt handler so far (in the mscc PHY driver), convert it to the changed API. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/phy.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 6b872aed8ba6..cb5a2182ba6d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -568,7 +569,7 @@ struct phy_driver { int (*did_interrupt)(struct phy_device *phydev); /* Override default interrupt handling */ - int (*handle_interrupt)(struct phy_device *phydev); + irqreturn_t (*handle_interrupt)(struct phy_device *phydev); /* Clears up any memory if needed */ void (*remove)(struct phy_device *phydev); -- cgit v1.2.3 From 6cc7cf8125b3d086cd80c96e02edb6f4ab9b20fa Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 17 Mar 2020 14:52:31 +0000 Subject: net: mdiobus: add APIs for modifying a MDIO device register Add APIs for modifying a MDIO device register, similar to the existing phy_modify() group of functions, but at mdiobus level instead. Adapt __phy_modify_changed() to use the new mdiobus level helper. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mdio.h | 4 ++++ include/linux/phy.h | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index a7604248777b..917e4bb2ed71 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -316,11 +316,15 @@ static inline void mii_10gbt_stat_mod_linkmode_lpa_t(unsigned long *advertising, int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, + u16 mask, u16 set); int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, + u16 set); int mdiobus_register_device(struct mdio_device *mdiodev); int mdiobus_unregister_device(struct mdio_device *mdiodev); diff --git a/include/linux/phy.h b/include/linux/phy.h index cb5a2182ba6d..36d9dea04016 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -754,6 +754,25 @@ static inline int __phy_write(struct phy_device *phydev, u32 regnum, u16 val) val); } +/** + * __phy_modify_changed() - Convenience function for modifying a PHY register + * @phydev: a pointer to a &struct phy_device + * @regnum: register number + * @mask: bit mask of bits to clear + * @set: bit mask of bits to set + * + * Unlocked helper function which allows a PHY register to be modified as + * new register value = (old register value & ~mask) | set + * + * Returns negative errno, 0 if there was no change, and 1 in case of change + */ +static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum, + u16 mask, u16 set) +{ + return __mdiobus_modify_changed(phydev->mdio.bus, phydev->mdio.addr, + regnum, mask, set); +} + /** * phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. -- cgit v1.2.3 From 74db1c18d80aaacdc42e6d00d7aca2bb5c97c7ea Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 17 Mar 2020 14:52:36 +0000 Subject: net: phylink: pcs: add 802.3 clause 22 helpers Implement helpers for PCS accessed via the MII bus using 802.3 clause 22 cycles, conforming to 802.3 clause 37 and Cisco SGMII specifications for the advertisement word. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 2180eb1aa254..de591c2fb37e 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -317,4 +317,10 @@ int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); void phylink_set_port_modes(unsigned long *bits); void phylink_helper_basex_speed(struct phylink_link_state *state); +void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, + struct phylink_link_state *state); +int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs, + const struct phylink_link_state *state); +void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs); + #endif -- cgit v1.2.3 From b8679ef8bedfe2bae90c97bc4c8a1826cfd98bba Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 17 Mar 2020 14:52:41 +0000 Subject: net: phylink: pcs: add 802.3 clause 45 helpers Implement helpers for PCS accessed via the MII bus using 802.3 clause 45 cycles for 10GBASE-R. Only link up/down is supported, 10G full duplex is assumed. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index de591c2fb37e..8fa6df3b881b 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -323,4 +323,6 @@ int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs, const struct phylink_link_state *state); void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs); +void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs, + struct phylink_link_state *state); #endif -- cgit v1.2.3 From 357b6cc5834eabc1be7c28a9faae7da061df097d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 18 Mar 2020 10:33:22 +0100 Subject: netfilter: revert introduction of egress hook This reverts the following commits: 8537f78647c0 ("netfilter: Introduce egress hook") 5418d3881e1f ("netfilter: Generalize ingress hook") b030f194aed2 ("netfilter: Rename ingress hook include file") >From the discussion in [0], the author's main motivation to add a hook in fast path is for an out of tree kernel module, which is a red flag to begin with. Other mentioned potential use cases like NAT{64,46} is on future extensions w/o concrete code in the tree yet. Revert as suggested [1] given the weak justification to add more hooks to critical fast-path. [0] https://lore.kernel.org/netdev/cover.1583927267.git.lukas@wunner.de/ [1] https://lore.kernel.org/netdev/20200318.011152.72770718915606186.davem@davemloft.net/ Signed-off-by: Daniel Borkmann Cc: David Miller Cc: Pablo Neira Ayuso Cc: Alexei Starovoitov Nacked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 -- include/linux/netfilter_ingress.h | 58 ++++++++++++++++++++++ include/linux/netfilter_netdev.h | 102 -------------------------------------- 3 files changed, 58 insertions(+), 106 deletions(-) create mode 100644 include/linux/netfilter_ingress.h delete mode 100644 include/linux/netfilter_netdev.h (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 15f1e32b430c..654808bfad83 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1751,7 +1751,6 @@ enum netdev_priv_flags { * @xps_maps: XXX: need comments on this one * @miniq_egress: clsact qdisc specific data for * egress processing - * @nf_hooks_egress: netfilter hooks executed for egress packets * @qdisc_hash: qdisc hash table * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) @@ -2027,9 +2026,6 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc __rcu *miniq_egress; #endif -#ifdef CONFIG_NETFILTER_EGRESS - struct nf_hook_entries __rcu *nf_hooks_egress; -#endif #ifdef CONFIG_NET_SCHED DECLARE_HASHTABLE (qdisc_hash, 4); diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h new file mode 100644 index 000000000000..a13774be2eb5 --- /dev/null +++ b/include/linux/netfilter_ingress.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NETFILTER_INGRESS_H_ +#define _NETFILTER_INGRESS_H_ + +#include +#include + +#ifdef CONFIG_NETFILTER_INGRESS +static inline bool nf_hook_ingress_active(const struct sk_buff *skb) +{ +#ifdef CONFIG_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) + return false; +#endif + return rcu_access_pointer(skb->dev->nf_hooks_ingress); +} + +/* caller must hold rcu_read_lock */ +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); + struct nf_hook_state state; + int ret; + + /* Must recheck the ingress hook head, in the event it became NULL + * after the check in nf_hook_ingress_active evaluated to true. + */ + if (unlikely(!e)) + return 0; + + nf_hook_state_init(&state, NF_NETDEV_INGRESS, + NFPROTO_NETDEV, skb->dev, NULL, NULL, + dev_net(skb->dev), NULL); + ret = nf_hook_slow(skb, &state, e, 0); + if (ret == 0) + return -1; + + return ret; +} + +static inline void nf_hook_ingress_init(struct net_device *dev) +{ + RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); +} +#else /* CONFIG_NETFILTER_INGRESS */ +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return 0; +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + return 0; +} + +static inline void nf_hook_ingress_init(struct net_device *dev) {} +#endif /* CONFIG_NETFILTER_INGRESS */ +#endif /* _NETFILTER_INGRESS_H_ */ diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h deleted file mode 100644 index 92d3611a782e..000000000000 --- a/include/linux/netfilter_netdev.h +++ /dev/null @@ -1,102 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NETFILTER_NETDEV_H_ -#define _NETFILTER_NETDEV_H_ - -#include -#include - -#ifdef CONFIG_NETFILTER -static __always_inline bool nf_hook_netdev_active(enum nf_dev_hooks hooknum, - struct nf_hook_entries __rcu *hooks) -{ -#ifdef CONFIG_JUMP_LABEL - if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][hooknum])) - return false; -#endif - return rcu_access_pointer(hooks); -} - -/* caller must hold rcu_read_lock */ -static __always_inline int nf_hook_netdev(struct sk_buff *skb, - enum nf_dev_hooks hooknum, - struct nf_hook_entries __rcu *hooks) -{ - struct nf_hook_entries *e = rcu_dereference(hooks); - struct nf_hook_state state; - int ret; - - /* Must recheck the hook head, in the event it became NULL - * after the check in nf_hook_netdev_active evaluated to true. - */ - if (unlikely(!e)) - return 0; - - nf_hook_state_init(&state, hooknum, - NFPROTO_NETDEV, skb->dev, NULL, NULL, - dev_net(skb->dev), NULL); - ret = nf_hook_slow(skb, &state, e, 0); - if (ret == 0) - return -1; - - return ret; -} -#endif /* CONFIG_NETFILTER */ - -static inline void nf_hook_netdev_init(struct net_device *dev) -{ -#ifdef CONFIG_NETFILTER_INGRESS - RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); -#endif -#ifdef CONFIG_NETFILTER_EGRESS - RCU_INIT_POINTER(dev->nf_hooks_egress, NULL); -#endif -} - -#ifdef CONFIG_NETFILTER_INGRESS -static inline bool nf_hook_ingress_active(const struct sk_buff *skb) -{ - return nf_hook_netdev_active(NF_NETDEV_INGRESS, - skb->dev->nf_hooks_ingress); -} - -static inline int nf_hook_ingress(struct sk_buff *skb) -{ - return nf_hook_netdev(skb, NF_NETDEV_INGRESS, - skb->dev->nf_hooks_ingress); -} -#else /* CONFIG_NETFILTER_INGRESS */ -static inline int nf_hook_ingress_active(struct sk_buff *skb) -{ - return 0; -} - -static inline int nf_hook_ingress(struct sk_buff *skb) -{ - return 0; -} -#endif /* CONFIG_NETFILTER_INGRESS */ - -#ifdef CONFIG_NETFILTER_EGRESS -static inline bool nf_hook_egress_active(const struct sk_buff *skb) -{ - return nf_hook_netdev_active(NF_NETDEV_EGRESS, - skb->dev->nf_hooks_egress); -} - -static inline int nf_hook_egress(struct sk_buff *skb) -{ - return nf_hook_netdev(skb, NF_NETDEV_EGRESS, - skb->dev->nf_hooks_egress); -} -#else /* CONFIG_NETFILTER_EGRESS */ -static inline int nf_hook_egress_active(struct sk_buff *skb) -{ - return 0; -} - -static inline int nf_hook_egress(struct sk_buff *skb) -{ - return 0; -} -#endif /* CONFIG_NETFILTER_EGRESS */ -#endif /* _NETFILTER_INGRESS_H_ */ -- cgit v1.2.3 From 8511d72f14bc7b94a96bb2990615e1cd7c1dd21e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 19 Mar 2020 15:47:41 +0100 Subject: sysfs: fix static inline declaration of sysfs_groups_change_owner() The CONFIG_SYSFS declaration of sysfs_group_change_owner() is different from the !CONFIG_SYSFS version and thus causes build failurs when !CONFIG_SYSFS is set. Reported-by: Stephen Rothwell Fixes: 303a42769c4c ("sysfs: add sysfs_group{s}_change_owner()") Signed-off-by: Christian Brauner Reported-by: Randy Dunlap Acked-by: Randy Dunlap # build-tested Signed-off-by: David S. Miller --- include/linux/sysfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 9e531ec76274..4beb51009b62 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -562,8 +562,8 @@ static inline int sysfs_groups_change_owner(struct kobject *kobj, } static inline int sysfs_group_change_owner(struct kobject *kobj, - const struct attribute_group **groups, - kuid_t kuid, kgid_t kgid) + const struct attribute_group *groups, + kuid_t kuid, kgid_t kgid) { return 0; } -- cgit v1.2.3 From 5eee3bb7103f4a66e4b90c2817f5e72509a2a607 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 20 Mar 2020 17:51:38 +0100 Subject: net: phy: add and use phy_check_downshift So far PHY drivers have to check whether a downshift occurred to be able to notify the user. To make life of drivers authors a little bit easier move the downshift notification to phylib. phy_check_downshift() compares the highest mutually advertised speed with the actual value of phydev->speed (typically read by the PHY driver from a vendor-specific register) to detect a downshift. v2: - Add downshift hint to phy_print_status Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 36d9dea04016..99b5e3c4b621 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -365,6 +365,7 @@ struct macsec_ops; * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus. * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. * loopback_enabled: Set true if this phy has been loopbacked successfully. + * downshifted_rate: Set true if link speed has been downshifted. * state: state of the PHY for management purposes * dev_flags: Device-specific flags used by the PHY driver. * irq: IRQ number of the PHY's interrupt (-1 if none) @@ -405,6 +406,7 @@ struct phy_device { unsigned suspended_by_mdio_bus:1; unsigned sysfs_links:1; unsigned loopback_enabled:1; + unsigned downshifted_rate:1; unsigned autoneg:1; /* The most recently read link state */ @@ -698,6 +700,7 @@ static inline bool phy_is_started(struct phy_device *phydev) void phy_resolve_aneg_pause(struct phy_device *phydev); void phy_resolve_aneg_linkmode(struct phy_device *phydev); +void phy_check_downshift(struct phy_device *phydev); /** * phy_read - Convenience function for reading a given PHY register -- cgit v1.2.3 From 5f5323a14cad19323060a8cbf9d96f2280a462dd Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Mon, 23 Mar 2020 23:05:51 +0800 Subject: iopoll: introduce read_poll_timeout macro this macro is an extension of readx_poll_timeout macro. the accessor function op just supports only one parameter in the readx_poll_timeout macro, but this macro can supports multiple variable parameters for it. so functions like phy_read(struct phy_device *phydev, u32 regnum) and phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) can also use this poll timeout core. and also expand it can sleep some time before read operation. Signed-off-by: Dejin Zheng Signed-off-by: David S. Miller --- include/linux/iopoll.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 35e15dfd4155..70f89b389648 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -13,6 +13,50 @@ #include #include +/** + * read_poll_timeout - Periodically poll an address until a condition is + * met or a timeout occurs + * @op: accessor function (takes @args as its arguments) + * @val: Variable to read the value into + * @cond: Break condition (usually involving @val) + * @sleep_us: Maximum time to sleep between reads in us (0 + * tight-loops). Should be less than ~20ms since usleep_range + * is used (see Documentation/timers/timers-howto.rst). + * @timeout_us: Timeout in us, 0 means never timeout + * @sleep_before_read: if it is true, sleep @sleep_us before read. + * @args: arguments for @op poll + * + * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @args is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. + * + * When available, you'll probably want to use one of the specialized + * macros defined below rather than this macro directly. + */ +#define read_poll_timeout(op, val, cond, sleep_us, timeout_us, \ + sleep_before_read, args...) \ +({ \ + u64 __timeout_us = (timeout_us); \ + unsigned long __sleep_us = (sleep_us); \ + ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ + might_sleep_if((__sleep_us) != 0); \ + if (sleep_before_read && __sleep_us) \ + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ + for (;;) { \ + (val) = op(args); \ + if (cond) \ + break; \ + if (__timeout_us && \ + ktime_compare(ktime_get(), __timeout) > 0) { \ + (val) = op(args); \ + break; \ + } \ + if (__sleep_us) \ + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + /** * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs * @op: accessor function (takes @addr as its only argument) -- cgit v1.2.3 From eaa6b01024a74ab5f3064f17dd88596284f497c4 Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Mon, 23 Mar 2020 23:05:52 +0800 Subject: iopoll: redefined readx_poll_timeout macro to simplify the code redefined readx_poll_timeout macro by read_poll_timeout to simplify the code. Signed-off-by: Dejin Zheng Signed-off-by: David S. Miller --- include/linux/iopoll.h | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 70f89b389648..cb20c733b15a 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -76,25 +76,7 @@ * macros defined below rather than this macro directly. */ #define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \ -({ \ - u64 __timeout_us = (timeout_us); \ - unsigned long __sleep_us = (sleep_us); \ - ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ - might_sleep_if((__sleep_us) != 0); \ - for (;;) { \ - (val) = op(addr); \ - if (cond) \ - break; \ - if (__timeout_us && \ - ktime_compare(ktime_get(), __timeout) > 0) { \ - (val) = op(addr); \ - break; \ - } \ - if (__sleep_us) \ - usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ - } \ - (cond) ? 0 : -ETIMEDOUT; \ -}) + read_poll_timeout(op, val, cond, sleep_us, timeout_us, false, addr) /** * readx_poll_timeout_atomic - Periodically poll an address until a condition is met or a timeout occurs -- cgit v1.2.3 From bd971ff0b73927b91f4520621d49d3a801ee4837 Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Mon, 23 Mar 2020 23:05:53 +0800 Subject: net: phy: introduce phy_read_mmd_poll_timeout macro it is sometimes necessary to poll a phy register by phy_read_mmd() function until its value satisfies some condition. introduce phy_read_mmd_poll_timeout() macros that do this. Suggested-by: Andrew Lunn Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Dejin Zheng Signed-off-by: David S. Miller --- include/linux/phy.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 99b5e3c4b621..3984f375126e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -787,6 +788,19 @@ static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum, */ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); +#define phy_read_mmd_poll_timeout(phydev, devaddr, regnum, val, cond, \ + sleep_us, timeout_us, sleep_before_read) \ +({ \ + int __ret = read_poll_timeout(phy_read_mmd, val, (cond) || val < 0, \ + sleep_us, timeout_us, sleep_before_read, \ + phydev, devaddr, regnum); \ + if (val < 0) \ + __ret = val; \ + if (__ret) \ + phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \ + __ret; \ +}) + /** * __phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. -- cgit v1.2.3 From fcbd30d09ba05389cb40cc1769b565df62aead35 Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Mon, 23 Mar 2020 23:05:57 +0800 Subject: net: phy: introduce phy_read_poll_timeout macro it is sometimes necessary to poll a phy register by phy_read() function until its value satisfies some condition. introduce phy_read_poll_timeout() macros that do this. Suggested-by: Andrew Lunn Reviewed-by: Andrew Lunn Signed-off-by: Dejin Zheng Signed-off-by: David S. Miller --- include/linux/phy.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 3984f375126e..2432ca463ddc 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -717,6 +717,19 @@ static inline int phy_read(struct phy_device *phydev, u32 regnum) return mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, regnum); } +#define phy_read_poll_timeout(phydev, regnum, val, cond, sleep_us, \ + timeout_us, sleep_before_read) \ +({ \ + int __ret = read_poll_timeout(phy_read, val, (cond) || val < 0, \ + sleep_us, timeout_us, sleep_before_read, phydev, regnum); \ + if (val < 0) \ + __ret = val; \ + if (__ret) \ + phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \ + __ret; \ +}) + + /** * __phy_read - convenience function for reading a given PHY register * @phydev: the phy_device struct -- cgit v1.2.3 From 5908220b2b3d6918f88cd645a39e1dcb84d1c5d9 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 25 Mar 2020 15:52:30 +0300 Subject: net: introduce the MACSEC netdev feature This patch introduce a new netdev feature, which will be used by drivers to state they can perform MACsec transformations in hardware. The patchset was gathered by Mark, macsec functinality itself was implemented by Dmitry, Mark and Pavel Belous. Signed-off-by: Antoine Tenart Signed-off-by: Mark Starovoytov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 34d050bb1ae6..9d53c5ad272c 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -83,6 +83,8 @@ enum { NETIF_F_HW_TLS_RECORD_BIT, /* Offload TLS record */ NETIF_F_GRO_FRAGLIST_BIT, /* Fraglist GRO */ + NETIF_F_HW_MACSEC_BIT, /* Offload MACsec operations */ + /* * Add your fresh new feature above and remember to update * netdev_features_strings[] in net/core/ethtool.c and maybe @@ -154,6 +156,7 @@ enum { #define NETIF_F_HW_TLS_RX __NETIF_F(HW_TLS_RX) #define NETIF_F_GRO_FRAGLIST __NETIF_F(GRO_FRAGLIST) #define NETIF_F_GSO_FRAGLIST __NETIF_F(GSO_FRAGLIST) +#define NETIF_F_HW_MACSEC __NETIF_F(HW_MACSEC) /* Finds the next feature with the highest number of the range of start till 0. */ -- cgit v1.2.3 From 30e9bb8472f4454d0544020574bb03d96ffa0e52 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 25 Mar 2020 15:52:31 +0300 Subject: net: add a reference to MACsec ops in net_device This patch adds a reference to MACsec ops to the net_device structure, allowing net device drivers to implement offloading operations for MACsec. Signed-off-by: Antoine Tenart Signed-off-by: Mark Starovoytov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 654808bfad83..b521500b244d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -53,6 +53,8 @@ struct netpoll_info; struct device; struct phy_device; struct dsa_port; +struct macsec_context; +struct macsec_ops; struct sfp_bus; /* 802.11 specific */ @@ -1819,6 +1821,8 @@ enum netdev_priv_flags { * that follow this device when it is moved * to another network namespace. * + * @macsec_ops: MACsec offloading ops + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2113,6 +2117,11 @@ struct net_device { unsigned wol_enabled:1; struct list_head net_notifier_list; + +#if IS_ENABLED(CONFIG_MACSEC) + /* MACsec management functions */ + const struct macsec_ops *macsec_ops; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From 16efafa31bc1fd7c8646dccbf30eeef3ad495d5a Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 27 Mar 2020 15:05:32 +0530 Subject: PCI: Add new PCI_VPD_RO_KEYWORD_SERIALNO macro This patch adds a new macro for serial number keyword. Acked-by: Bjorn Helgaas Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index fc54b8922e66..a048fba311d2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2184,6 +2184,7 @@ int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask); #define PCI_VPD_INFO_FLD_HDR_SIZE 3 #define PCI_VPD_RO_KEYWORD_PARTNO "PN" +#define PCI_VPD_RO_KEYWORD_SERIALNO "SN" #define PCI_VPD_RO_KEYWORD_MFR_ID "MN" #define PCI_VPD_RO_KEYWORD_VENDOR0 "V0" #define PCI_VPD_RO_KEYWORD_CHKSUM "RV" -- cgit v1.2.3 From ab41ca3455a208392ce95f4086d5708dc37bff86 Mon Sep 17 00:00:00 2001 From: Murali Krishna Policharla Date: Fri, 27 Mar 2020 21:55:40 +0200 Subject: net: phy: bcm7xx: add jumbo frame configuration to PHY The BCM7XX PHY family requires special configuration to pass jumbo frames. Do that during initial PHY setup. Signed-off-by: Murali Krishna Policharla Reviewed-by: Scott Branden Signed-off-by: Vladimir Oltean Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index b475e7f20d28..6462c5447872 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -79,6 +79,7 @@ #define MII_BCM54XX_ECR 0x10 /* BCM54xx extended control register */ #define MII_BCM54XX_ECR_IM 0x1000 /* Interrupt mask */ #define MII_BCM54XX_ECR_IF 0x0800 /* Interrupt force */ +#define MII_BCM54XX_ECR_FIFOE 0x0001 /* FIFO elasticity */ #define MII_BCM54XX_ESR 0x11 /* BCM54xx extended status register */ #define MII_BCM54XX_ESR_IS 0x1000 /* Interrupt status */ @@ -119,6 +120,7 @@ #define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x00 #define MII_BCM54XX_AUXCTL_ACTL_TX_6DB 0x0400 #define MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA 0x0800 +#define MII_BCM54XX_AUXCTL_ACTL_EXT_PKT_LEN 0x4000 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x07 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN 0x0010 -- cgit v1.2.3 From f318903c0bf42448b4c884732df2bbb0ef7a2284 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 27 Mar 2020 16:58:52 +0100 Subject: bpf: Add netns cookie and enable it for bpf cgroup hooks In Cilium we're mainly using BPF cgroup hooks today in order to implement kube-proxy free Kubernetes service translation for ClusterIP, NodePort (*), ExternalIP, and LoadBalancer as well as HostPort mapping [0] for all traffic between Cilium managed nodes. While this works in its current shape and avoids packet-level NAT for inter Cilium managed node traffic, there is one major limitation we're facing today, that is, lack of netns awareness. In Kubernetes, the concept of Pods (which hold one or multiple containers) has been built around network namespaces, so while we can use the global scope of attaching to root BPF cgroup hooks also to our advantage (e.g. for exposing NodePort ports on loopback addresses), we also have the need to differentiate between initial network namespaces and non-initial one. For example, ExternalIP services mandate that non-local service IPs are not to be translated from the host (initial) network namespace as one example. Right now, we have an ugly work-around in place where non-local service IPs for ExternalIP services are not xlated from connect() and friends BPF hooks but instead via less efficient packet-level NAT on the veth tc ingress hook for Pod traffic. On top of determining whether we're in initial or non-initial network namespace we also have a need for a socket-cookie like mechanism for network namespaces scope. Socket cookies have the nice property that they can be combined as part of the key structure e.g. for BPF LRU maps without having to worry that the cookie could be recycled. We are planning to use this for our sessionAffinity implementation for services. Therefore, add a new bpf_get_netns_cookie() helper which would resolve both use cases at once: bpf_get_netns_cookie(NULL) would provide the cookie for the initial network namespace while passing the context instead of NULL would provide the cookie from the application's network namespace. We're using a hole, so no size increase; the assignment happens only once. Therefore this allows for a comparison on initial namespace as well as regular cookie usage as we have today with socket cookies. We could later on enable this helper for other program types as well as we would see need. (*) Both externalTrafficPolicy={Local|Cluster} types [0] https://github.com/cilium/cilium/blob/master/bpf/bpf_sock.c Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/c47d2346982693a9cf9da0e12690453aded4c788.1585323121.git.daniel@iogearbox.net --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bdb981c204fa..78046c570596 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -233,6 +233,7 @@ enum bpf_arg_type { ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ ARG_PTR_TO_CTX, /* pointer to context */ + ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */ ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ -- cgit v1.2.3 From 0f09abd105da6c37713d2b253730a86cb45e127a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 27 Mar 2020 16:58:54 +0100 Subject: bpf: Enable bpf cgroup hooks to retrieve cgroup v2 and ancestor id Enable the bpf_get_current_cgroup_id() helper for connect(), sendmsg(), recvmsg() and bind-related hooks in order to retrieve the cgroup v2 context which can then be used as part of the key for BPF map lookups, for example. Given these hooks operate in process context 'current' is always valid and pointing to the app that is performing mentioned syscalls if it's subject to a v2 cgroup. Also with same motivation of commit 7723628101aa ("bpf: Introduce bpf_skb_ancestor_cgroup_id helper") enable retrieval of ancestor from current so the cgroup id can be used for policy lookups which can then forbid connect() / bind(), for example. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/d2a7ef42530ad299e3cbb245e6c12374b72145ef.1585323121.git.daniel@iogearbox.net --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 78046c570596..372708eeaecd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1501,6 +1501,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto; extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; +extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto; extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; extern const struct bpf_func_proto bpf_msg_redirect_map_proto; extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; -- cgit v1.2.3 From 92234c8f15c8d96ad7e52afdc5994cba6be68eb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 25 Mar 2020 18:23:26 +0100 Subject: xdp: Support specifying expected existing program when attaching XDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While it is currently possible for userspace to specify that an existing XDP program should not be replaced when attaching to an interface, there is no mechanism to safely replace a specific XDP program with another. This patch adds a new netlink attribute, IFLA_XDP_EXPECTED_FD, which can be set along with IFLA_XDP_FD. If set, the kernel will check that the program currently loaded on the interface matches the expected one, and fail the operation if it does not. This corresponds to a 'cmpxchg' memory operation. Setting the new attribute with a negative value means that no program is expected to be attached, which corresponds to setting the UPDATE_IF_NOEXIST flag. A new companion flag, XDP_FLAGS_REPLACE, is also added to explicitly request checking of the EXPECTED_FD attribute. This is needed for userspace to discover whether the kernel supports the new attribute. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/158515700640.92963.3551295145441017022.stgit@toke.dk --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 654808bfad83..b503d468f0df 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3768,7 +3768,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, - int fd, u32 flags); + int fd, int expected_fd, u32 flags); u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op, enum bpf_netdev_command cmd); int xdp_umem_query(struct net_device *dev, u16 queue_id); -- cgit v1.2.3 From fc611f47f2188ade2b48ff6902d5cce8baac0c58 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 29 Mar 2020 01:43:49 +0100 Subject: bpf: Introduce BPF_PROG_TYPE_LSM Introduce types and configs for bpf programs that can be attached to LSM hooks. The programs can be enabled by the config option CONFIG_BPF_LSM. Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Reviewed-by: Brendan Jackman Reviewed-by: Florent Revest Reviewed-by: Thomas Garnier Acked-by: Yonghong Song Acked-by: Andrii Nakryiko Acked-by: James Morris Link: https://lore.kernel.org/bpf/20200329004356.27286-2-kpsingh@chromium.org --- include/linux/bpf.h | 3 +++ include/linux/bpf_types.h | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 372708eeaecd..3bde59a8453b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1515,6 +1515,9 @@ extern const struct bpf_func_proto bpf_tcp_sock_proto; extern const struct bpf_func_proto bpf_jiffies64_proto; extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; +const struct bpf_func_proto *bpf_tracing_func_proto( + enum bpf_func_id func_id, const struct bpf_prog *prog); + /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index c81d4ece79a4..ba0c2d56f8a3 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -70,6 +70,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_STRUCT_OPS, bpf_struct_ops, void *, void *) BPF_PROG_TYPE(BPF_PROG_TYPE_EXT, bpf_extension, void *, void *) +#ifdef CONFIG_BPF_LSM +BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm, + void *, void *) +#endif /* CONFIG_BPF_LSM */ #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) -- cgit v1.2.3 From 98e828a0650f348be85728c69875260cf78069e6 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 29 Mar 2020 01:43:50 +0100 Subject: security: Refactor declaration of LSM hooks The information about the different types of LSM hooks is scattered in two locations i.e. union security_list_options and struct security_hook_heads. Rather than duplicating this information even further for BPF_PROG_TYPE_LSM, define all the hooks with the LSM_HOOK macro in lsm_hook_defs.h which is then used to generate all the data structures required by the LSM framework. The LSM hooks are defined as: LSM_HOOK(, , , args...) with acccessible in security.c as: LSM_RET_DEFAULT() Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Reviewed-by: Brendan Jackman Reviewed-by: Florent Revest Reviewed-by: Kees Cook Reviewed-by: Casey Schaufler Acked-by: James Morris Link: https://lore.kernel.org/bpf/20200329004356.27286-3-kpsingh@chromium.org --- include/linux/lsm_hook_defs.h | 381 +++++++++++++++++++++++++ include/linux/lsm_hooks.h | 628 +----------------------------------------- 2 files changed, 393 insertions(+), 616 deletions(-) create mode 100644 include/linux/lsm_hook_defs.h (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h new file mode 100644 index 000000000000..9cd4455528e5 --- /dev/null +++ b/include/linux/lsm_hook_defs.h @@ -0,0 +1,381 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Linux Security Module Hook declarations. + * + * Copyright (C) 2001 WireX Communications, Inc + * Copyright (C) 2001 Greg Kroah-Hartman + * Copyright (C) 2001 Networks Associates Technology, Inc + * Copyright (C) 2001 James Morris + * Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group) + * Copyright (C) 2015 Intel Corporation. + * Copyright (C) 2015 Casey Schaufler + * Copyright (C) 2016 Mellanox Techonologies + * Copyright (C) 2020 Google LLC. + */ + +/* + * The macro LSM_HOOK is used to define the data structures required by the + * the LSM framework using the pattern: + * + * LSM_HOOK(, , , args...) + * + * struct security_hook_heads { + * #define LSM_HOOK(RET, DEFAULT, NAME, ...) struct hlist_head NAME; + * #include + * #undef LSM_HOOK + * }; + */ +LSM_HOOK(int, 0, binder_set_context_mgr, struct task_struct *mgr) +LSM_HOOK(int, 0, binder_transaction, struct task_struct *from, + struct task_struct *to) +LSM_HOOK(int, 0, binder_transfer_binder, struct task_struct *from, + struct task_struct *to) +LSM_HOOK(int, 0, binder_transfer_file, struct task_struct *from, + struct task_struct *to, struct file *file) +LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child, + unsigned int mode) +LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent) +LSM_HOOK(int, 0, capget, struct task_struct *target, kernel_cap_t *effective, + kernel_cap_t *inheritable, kernel_cap_t *permitted) +LSM_HOOK(int, 0, capset, struct cred *new, const struct cred *old, + const kernel_cap_t *effective, const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) +LSM_HOOK(int, 0, capable, const struct cred *cred, struct user_namespace *ns, + int cap, unsigned int opts) +LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, struct super_block *sb) +LSM_HOOK(int, 0, quota_on, struct dentry *dentry) +LSM_HOOK(int, 0, syslog, int type) +LSM_HOOK(int, 0, settime, const struct timespec64 *ts, + const struct timezone *tz) +LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) +LSM_HOOK(int, 0, bprm_set_creds, struct linux_binprm *bprm) +LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) +LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, + struct fs_context *src_sc) +LSM_HOOK(int, 0, fs_context_parse_param, struct fs_context *fc, + struct fs_parameter *param) +LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb) +LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb) +LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts) +LSM_HOOK(int, 0, sb_eat_lsm_opts, char *orig, void **mnt_opts) +LSM_HOOK(int, 0, sb_remount, struct super_block *sb, void *mnt_opts) +LSM_HOOK(int, 0, sb_kern_mount, struct super_block *sb) +LSM_HOOK(int, 0, sb_show_options, struct seq_file *m, struct super_block *sb) +LSM_HOOK(int, 0, sb_statfs, struct dentry *dentry) +LSM_HOOK(int, 0, sb_mount, const char *dev_name, const struct path *path, + const char *type, unsigned long flags, void *data) +LSM_HOOK(int, 0, sb_umount, struct vfsmount *mnt, int flags) +LSM_HOOK(int, 0, sb_pivotroot, const struct path *old_path, + const struct path *new_path) +LSM_HOOK(int, 0, sb_set_mnt_opts, struct super_block *sb, void *mnt_opts, + unsigned long kern_flags, unsigned long *set_kern_flags) +LSM_HOOK(int, 0, sb_clone_mnt_opts, const struct super_block *oldsb, + struct super_block *newsb, unsigned long kern_flags, + unsigned long *set_kern_flags) +LSM_HOOK(int, 0, sb_add_mnt_opt, const char *option, const char *val, + int len, void **mnt_opts) +LSM_HOOK(int, 0, move_mount, const struct path *from_path, + const struct path *to_path) +LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry, + int mode, const struct qstr *name, void **ctx, u32 *ctxlen) +LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, + struct qstr *name, const struct cred *old, struct cred *new) + +#ifdef CONFIG_SECURITY_PATH +LSM_HOOK(int, 0, path_unlink, const struct path *dir, struct dentry *dentry) +LSM_HOOK(int, 0, path_mkdir, const struct path *dir, struct dentry *dentry, + umode_t mode) +LSM_HOOK(int, 0, path_rmdir, const struct path *dir, struct dentry *dentry) +LSM_HOOK(int, 0, path_mknod, const struct path *dir, struct dentry *dentry, + umode_t mode, unsigned int dev) +LSM_HOOK(int, 0, path_truncate, const struct path *path) +LSM_HOOK(int, 0, path_symlink, const struct path *dir, struct dentry *dentry, + const char *old_name) +LSM_HOOK(int, 0, path_link, struct dentry *old_dentry, + const struct path *new_dir, struct dentry *new_dentry) +LSM_HOOK(int, 0, path_rename, const struct path *old_dir, + struct dentry *old_dentry, const struct path *new_dir, + struct dentry *new_dentry) +LSM_HOOK(int, 0, path_chmod, const struct path *path, umode_t mode) +LSM_HOOK(int, 0, path_chown, const struct path *path, kuid_t uid, kgid_t gid) +LSM_HOOK(int, 0, path_chroot, const struct path *path) +#endif /* CONFIG_SECURITY_PATH */ + +/* Needed for inode based security check */ +LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask, + unsigned int obj_type) +LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode) +LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode) +LSM_HOOK(int, 0, inode_init_security, struct inode *inode, + struct inode *dir, const struct qstr *qstr, const char **name, + void **value, size_t *len) +LSM_HOOK(int, 0, inode_create, struct inode *dir, struct dentry *dentry, + umode_t mode) +LSM_HOOK(int, 0, inode_link, struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +LSM_HOOK(int, 0, inode_unlink, struct inode *dir, struct dentry *dentry) +LSM_HOOK(int, 0, inode_symlink, struct inode *dir, struct dentry *dentry, + const char *old_name) +LSM_HOOK(int, 0, inode_mkdir, struct inode *dir, struct dentry *dentry, + umode_t mode) +LSM_HOOK(int, 0, inode_rmdir, struct inode *dir, struct dentry *dentry) +LSM_HOOK(int, 0, inode_mknod, struct inode *dir, struct dentry *dentry, + umode_t mode, dev_t dev) +LSM_HOOK(int, 0, inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +LSM_HOOK(int, 0, inode_readlink, struct dentry *dentry) +LSM_HOOK(int, 0, inode_follow_link, struct dentry *dentry, struct inode *inode, + bool rcu) +LSM_HOOK(int, 0, inode_permission, struct inode *inode, int mask) +LSM_HOOK(int, 0, inode_setattr, struct dentry *dentry, struct iattr *attr) +LSM_HOOK(int, 0, inode_getattr, const struct path *path) +LSM_HOOK(int, 0, inode_setxattr, struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +LSM_HOOK(void, LSM_RET_VOID, inode_post_setxattr, struct dentry *dentry, + const char *name, const void *value, size_t size, int flags) +LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name) +LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry) +LSM_HOOK(int, 0, inode_removexattr, struct dentry *dentry, const char *name) +LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry) +LSM_HOOK(int, 0, inode_killpriv, struct dentry *dentry) +LSM_HOOK(int, -EOPNOTSUPP, inode_getsecurity, struct inode *inode, + const char *name, void **buffer, bool alloc) +LSM_HOOK(int, -EOPNOTSUPP, inode_setsecurity, struct inode *inode, + const char *name, const void *value, size_t size, int flags) +LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer, + size_t buffer_size) +LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid) +LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new) +LSM_HOOK(int, 0, inode_copy_up_xattr, const char *name) +LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir, + struct kernfs_node *kn) +LSM_HOOK(int, 0, file_permission, struct file *file, int mask) +LSM_HOOK(int, 0, file_alloc_security, struct file *file) +LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file) +LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd, + unsigned long arg) +LSM_HOOK(int, 0, mmap_addr, unsigned long addr) +LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) +LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma, + unsigned long reqprot, unsigned long prot) +LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd) +LSM_HOOK(int, 0, file_fcntl, struct file *file, unsigned int cmd, + unsigned long arg) +LSM_HOOK(void, LSM_RET_VOID, file_set_fowner, struct file *file) +LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk, + struct fown_struct *fown, int sig) +LSM_HOOK(int, 0, file_receive, struct file *file) +LSM_HOOK(int, 0, file_open, struct file *file) +LSM_HOOK(int, 0, task_alloc, struct task_struct *task, + unsigned long clone_flags) +LSM_HOOK(void, LSM_RET_VOID, task_free, struct task_struct *task) +LSM_HOOK(int, 0, cred_alloc_blank, struct cred *cred, gfp_t gfp) +LSM_HOOK(void, LSM_RET_VOID, cred_free, struct cred *cred) +LSM_HOOK(int, 0, cred_prepare, struct cred *new, const struct cred *old, + gfp_t gfp) +LSM_HOOK(void, LSM_RET_VOID, cred_transfer, struct cred *new, + const struct cred *old) +LSM_HOOK(void, LSM_RET_VOID, cred_getsecid, const struct cred *c, u32 *secid) +LSM_HOOK(int, 0, kernel_act_as, struct cred *new, u32 secid) +LSM_HOOK(int, 0, kernel_create_files_as, struct cred *new, struct inode *inode) +LSM_HOOK(int, 0, kernel_module_request, char *kmod_name) +LSM_HOOK(int, 0, kernel_load_data, enum kernel_load_data_id id) +LSM_HOOK(int, 0, kernel_read_file, struct file *file, + enum kernel_read_file_id id) +LSM_HOOK(int, 0, kernel_post_read_file, struct file *file, char *buf, + loff_t size, enum kernel_read_file_id id) +LSM_HOOK(int, 0, task_fix_setuid, struct cred *new, const struct cred *old, + int flags) +LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid) +LSM_HOOK(int, 0, task_getpgid, struct task_struct *p) +LSM_HOOK(int, 0, task_getsid, struct task_struct *p) +LSM_HOOK(void, LSM_RET_VOID, task_getsecid, struct task_struct *p, u32 *secid) +LSM_HOOK(int, 0, task_setnice, struct task_struct *p, int nice) +LSM_HOOK(int, 0, task_setioprio, struct task_struct *p, int ioprio) +LSM_HOOK(int, 0, task_getioprio, struct task_struct *p) +LSM_HOOK(int, 0, task_prlimit, const struct cred *cred, + const struct cred *tcred, unsigned int flags) +LSM_HOOK(int, 0, task_setrlimit, struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim) +LSM_HOOK(int, 0, task_setscheduler, struct task_struct *p) +LSM_HOOK(int, 0, task_getscheduler, struct task_struct *p) +LSM_HOOK(int, 0, task_movememory, struct task_struct *p) +LSM_HOOK(int, 0, task_kill, struct task_struct *p, struct kernel_siginfo *info, + int sig, const struct cred *cred) +LSM_HOOK(int, -ENOSYS, task_prctl, int option, unsigned long arg2, + unsigned long arg3, unsigned long arg4, unsigned long arg5) +LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p, + struct inode *inode) +LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag) +LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp, + u32 *secid) +LSM_HOOK(int, 0, msg_msg_alloc_security, struct msg_msg *msg) +LSM_HOOK(void, LSM_RET_VOID, msg_msg_free_security, struct msg_msg *msg) +LSM_HOOK(int, 0, msg_queue_alloc_security, struct kern_ipc_perm *perm) +LSM_HOOK(void, LSM_RET_VOID, msg_queue_free_security, + struct kern_ipc_perm *perm) +LSM_HOOK(int, 0, msg_queue_associate, struct kern_ipc_perm *perm, int msqflg) +LSM_HOOK(int, 0, msg_queue_msgctl, struct kern_ipc_perm *perm, int cmd) +LSM_HOOK(int, 0, msg_queue_msgsnd, struct kern_ipc_perm *perm, + struct msg_msg *msg, int msqflg) +LSM_HOOK(int, 0, msg_queue_msgrcv, struct kern_ipc_perm *perm, + struct msg_msg *msg, struct task_struct *target, long type, int mode) +LSM_HOOK(int, 0, shm_alloc_security, struct kern_ipc_perm *perm) +LSM_HOOK(void, LSM_RET_VOID, shm_free_security, struct kern_ipc_perm *perm) +LSM_HOOK(int, 0, shm_associate, struct kern_ipc_perm *perm, int shmflg) +LSM_HOOK(int, 0, shm_shmctl, struct kern_ipc_perm *perm, int cmd) +LSM_HOOK(int, 0, shm_shmat, struct kern_ipc_perm *perm, char __user *shmaddr, + int shmflg) +LSM_HOOK(int, 0, sem_alloc_security, struct kern_ipc_perm *perm) +LSM_HOOK(void, LSM_RET_VOID, sem_free_security, struct kern_ipc_perm *perm) +LSM_HOOK(int, 0, sem_associate, struct kern_ipc_perm *perm, int semflg) +LSM_HOOK(int, 0, sem_semctl, struct kern_ipc_perm *perm, int cmd) +LSM_HOOK(int, 0, sem_semop, struct kern_ipc_perm *perm, struct sembuf *sops, + unsigned nsops, int alter) +LSM_HOOK(int, 0, netlink_send, struct sock *sk, struct sk_buff *skb) +LSM_HOOK(void, LSM_RET_VOID, d_instantiate, struct dentry *dentry, + struct inode *inode) +LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, char *name, + char **value) +LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) +LSM_HOOK(int, 0, ismaclabel, const char *name) +LSM_HOOK(int, 0, secid_to_secctx, u32 secid, char **secdata, + u32 *seclen) +LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) +LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) +LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) +LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) +LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx, + u32 *ctxlen) + +#ifdef CONFIG_SECURITY_NETWORK +LSM_HOOK(int, 0, unix_stream_connect, struct sock *sock, struct sock *other, + struct sock *newsk) +LSM_HOOK(int, 0, unix_may_send, struct socket *sock, struct socket *other) +LSM_HOOK(int, 0, socket_create, int family, int type, int protocol, int kern) +LSM_HOOK(int, 0, socket_post_create, struct socket *sock, int family, int type, + int protocol, int kern) +LSM_HOOK(int, 0, socket_socketpair, struct socket *socka, struct socket *sockb) +LSM_HOOK(int, 0, socket_bind, struct socket *sock, struct sockaddr *address, + int addrlen) +LSM_HOOK(int, 0, socket_connect, struct socket *sock, struct sockaddr *address, + int addrlen) +LSM_HOOK(int, 0, socket_listen, struct socket *sock, int backlog) +LSM_HOOK(int, 0, socket_accept, struct socket *sock, struct socket *newsock) +LSM_HOOK(int, 0, socket_sendmsg, struct socket *sock, struct msghdr *msg, + int size) +LSM_HOOK(int, 0, socket_recvmsg, struct socket *sock, struct msghdr *msg, + int size, int flags) +LSM_HOOK(int, 0, socket_getsockname, struct socket *sock) +LSM_HOOK(int, 0, socket_getpeername, struct socket *sock) +LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname) +LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) +LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) +LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) +LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, + char __user *optval, int __user *optlen, unsigned len) +LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, + struct sk_buff *skb, u32 *secid) +LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) +LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk) +LSM_HOOK(void, LSM_RET_VOID, sk_clone_security, const struct sock *sk, + struct sock *newsk) +LSM_HOOK(void, LSM_RET_VOID, sk_getsecid, struct sock *sk, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, sock_graft, struct sock *sk, struct socket *parent) +LSM_HOOK(int, 0, inet_conn_request, struct sock *sk, struct sk_buff *skb, + struct request_sock *req) +LSM_HOOK(void, LSM_RET_VOID, inet_csk_clone, struct sock *newsk, + const struct request_sock *req) +LSM_HOOK(void, LSM_RET_VOID, inet_conn_established, struct sock *sk, + struct sk_buff *skb) +LSM_HOOK(int, 0, secmark_relabel_packet, u32 secid) +LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_inc, void) +LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void) +LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req, + struct flowi *fl) +LSM_HOOK(int, 0, tun_dev_alloc_security, void **security) +LSM_HOOK(void, LSM_RET_VOID, tun_dev_free_security, void *security) +LSM_HOOK(int, 0, tun_dev_create, void) +LSM_HOOK(int, 0, tun_dev_attach_queue, void *security) +LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security) +LSM_HOOK(int, 0, tun_dev_open, void *security) +LSM_HOOK(int, 0, sctp_assoc_request, struct sctp_endpoint *ep, + struct sk_buff *skb) +LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname, + struct sockaddr *address, int addrlen) +LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_endpoint *ep, + struct sock *sk, struct sock *newsk) +#endif /* CONFIG_SECURITY_NETWORK */ + +#ifdef CONFIG_SECURITY_INFINIBAND +LSM_HOOK(int, 0, ib_pkey_access, void *sec, u64 subnet_prefix, u16 pkey) +LSM_HOOK(int, 0, ib_endport_manage_subnet, void *sec, const char *dev_name, + u8 port_num) +LSM_HOOK(int, 0, ib_alloc_security, void **sec) +LSM_HOOK(void, LSM_RET_VOID, ib_free_security, void *sec) +#endif /* CONFIG_SECURITY_INFINIBAND */ + +#ifdef CONFIG_SECURITY_NETWORK_XFRM +LSM_HOOK(int, 0, xfrm_policy_alloc_security, struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp) +LSM_HOOK(int, 0, xfrm_policy_clone_security, struct xfrm_sec_ctx *old_ctx, + struct xfrm_sec_ctx **new_ctx) +LSM_HOOK(void, LSM_RET_VOID, xfrm_policy_free_security, + struct xfrm_sec_ctx *ctx) +LSM_HOOK(int, 0, xfrm_policy_delete_security, struct xfrm_sec_ctx *ctx) +LSM_HOOK(int, 0, xfrm_state_alloc, struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) +LSM_HOOK(int, 0, xfrm_state_alloc_acquire, struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid) +LSM_HOOK(void, LSM_RET_VOID, xfrm_state_free_security, struct xfrm_state *x) +LSM_HOOK(int, 0, xfrm_state_delete_security, struct xfrm_state *x) +LSM_HOOK(int, 0, xfrm_policy_lookup, struct xfrm_sec_ctx *ctx, u32 fl_secid, + u8 dir) +LSM_HOOK(int, 1, xfrm_state_pol_flow_match, struct xfrm_state *x, + struct xfrm_policy *xp, const struct flowi *fl) +LSM_HOOK(int, 0, xfrm_decode_session, struct sk_buff *skb, u32 *secid, + int ckall) +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ + +/* key management security hooks */ +#ifdef CONFIG_KEYS +LSM_HOOK(int, 0, key_alloc, struct key *key, const struct cred *cred, + unsigned long flags) +LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key) +LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred, + unsigned perm) +LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **_buffer) +#endif /* CONFIG_KEYS */ + +#ifdef CONFIG_AUDIT +LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr, + void **lsmrule) +LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule) +LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule) +LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) +#endif /* CONFIG_AUDIT */ + +#ifdef CONFIG_BPF_SYSCALL +LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size) +LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode) +LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog) +LSM_HOOK(int, 0, bpf_map_alloc_security, struct bpf_map *map) +LSM_HOOK(void, LSM_RET_VOID, bpf_map_free_security, struct bpf_map *map) +LSM_HOOK(int, 0, bpf_prog_alloc_security, struct bpf_prog_aux *aux) +LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux) +#endif /* CONFIG_BPF_SYSCALL */ + +LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) + +#ifdef CONFIG_PERF_EVENTS +LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type) +LSM_HOOK(int, 0, perf_event_alloc, struct perf_event *event) +LSM_HOOK(void, LSM_RET_VOID, perf_event_free, struct perf_event *event) +LSM_HOOK(int, 0, perf_event_read, struct perf_event *event) +LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) +#endif /* CONFIG_PERF_EVENTS */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 20d8cf194fb7..c09623b32489 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1456,625 +1456,15 @@ * @what: kernel feature being accessed */ union security_list_options { - int (*binder_set_context_mgr)(struct task_struct *mgr); - int (*binder_transaction)(struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_binder)(struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_file)(struct task_struct *from, - struct task_struct *to, - struct file *file); - - int (*ptrace_access_check)(struct task_struct *child, - unsigned int mode); - int (*ptrace_traceme)(struct task_struct *parent); - int (*capget)(struct task_struct *target, kernel_cap_t *effective, - kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset)(struct cred *new, const struct cred *old, - const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); - int (*capable)(const struct cred *cred, - struct user_namespace *ns, - int cap, - unsigned int opts); - int (*quotactl)(int cmds, int type, int id, struct super_block *sb); - int (*quota_on)(struct dentry *dentry); - int (*syslog)(int type); - int (*settime)(const struct timespec64 *ts, const struct timezone *tz); - int (*vm_enough_memory)(struct mm_struct *mm, long pages); - - int (*bprm_set_creds)(struct linux_binprm *bprm); - int (*bprm_check_security)(struct linux_binprm *bprm); - void (*bprm_committing_creds)(struct linux_binprm *bprm); - void (*bprm_committed_creds)(struct linux_binprm *bprm); - - int (*fs_context_dup)(struct fs_context *fc, struct fs_context *src_sc); - int (*fs_context_parse_param)(struct fs_context *fc, struct fs_parameter *param); - - int (*sb_alloc_security)(struct super_block *sb); - void (*sb_free_security)(struct super_block *sb); - void (*sb_free_mnt_opts)(void *mnt_opts); - int (*sb_eat_lsm_opts)(char *orig, void **mnt_opts); - int (*sb_remount)(struct super_block *sb, void *mnt_opts); - int (*sb_kern_mount)(struct super_block *sb); - int (*sb_show_options)(struct seq_file *m, struct super_block *sb); - int (*sb_statfs)(struct dentry *dentry); - int (*sb_mount)(const char *dev_name, const struct path *path, - const char *type, unsigned long flags, void *data); - int (*sb_umount)(struct vfsmount *mnt, int flags); - int (*sb_pivotroot)(const struct path *old_path, const struct path *new_path); - int (*sb_set_mnt_opts)(struct super_block *sb, - void *mnt_opts, - unsigned long kern_flags, - unsigned long *set_kern_flags); - int (*sb_clone_mnt_opts)(const struct super_block *oldsb, - struct super_block *newsb, - unsigned long kern_flags, - unsigned long *set_kern_flags); - int (*sb_add_mnt_opt)(const char *option, const char *val, int len, - void **mnt_opts); - int (*move_mount)(const struct path *from_path, const struct path *to_path); - int (*dentry_init_security)(struct dentry *dentry, int mode, - const struct qstr *name, void **ctx, - u32 *ctxlen); - int (*dentry_create_files_as)(struct dentry *dentry, int mode, - struct qstr *name, - const struct cred *old, - struct cred *new); - - -#ifdef CONFIG_SECURITY_PATH - int (*path_unlink)(const struct path *dir, struct dentry *dentry); - int (*path_mkdir)(const struct path *dir, struct dentry *dentry, - umode_t mode); - int (*path_rmdir)(const struct path *dir, struct dentry *dentry); - int (*path_mknod)(const struct path *dir, struct dentry *dentry, - umode_t mode, unsigned int dev); - int (*path_truncate)(const struct path *path); - int (*path_symlink)(const struct path *dir, struct dentry *dentry, - const char *old_name); - int (*path_link)(struct dentry *old_dentry, const struct path *new_dir, - struct dentry *new_dentry); - int (*path_rename)(const struct path *old_dir, struct dentry *old_dentry, - const struct path *new_dir, - struct dentry *new_dentry); - int (*path_chmod)(const struct path *path, umode_t mode); - int (*path_chown)(const struct path *path, kuid_t uid, kgid_t gid); - int (*path_chroot)(const struct path *path); -#endif - /* Needed for inode based security check */ - int (*path_notify)(const struct path *path, u64 mask, - unsigned int obj_type); - int (*inode_alloc_security)(struct inode *inode); - void (*inode_free_security)(struct inode *inode); - int (*inode_init_security)(struct inode *inode, struct inode *dir, - const struct qstr *qstr, - const char **name, void **value, - size_t *len); - int (*inode_create)(struct inode *dir, struct dentry *dentry, - umode_t mode); - int (*inode_link)(struct dentry *old_dentry, struct inode *dir, - struct dentry *new_dentry); - int (*inode_unlink)(struct inode *dir, struct dentry *dentry); - int (*inode_symlink)(struct inode *dir, struct dentry *dentry, - const char *old_name); - int (*inode_mkdir)(struct inode *dir, struct dentry *dentry, - umode_t mode); - int (*inode_rmdir)(struct inode *dir, struct dentry *dentry); - int (*inode_mknod)(struct inode *dir, struct dentry *dentry, - umode_t mode, dev_t dev); - int (*inode_rename)(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, - struct dentry *new_dentry); - int (*inode_readlink)(struct dentry *dentry); - int (*inode_follow_link)(struct dentry *dentry, struct inode *inode, - bool rcu); - int (*inode_permission)(struct inode *inode, int mask); - int (*inode_setattr)(struct dentry *dentry, struct iattr *attr); - int (*inode_getattr)(const struct path *path); - int (*inode_setxattr)(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); - void (*inode_post_setxattr)(struct dentry *dentry, const char *name, - const void *value, size_t size, - int flags); - int (*inode_getxattr)(struct dentry *dentry, const char *name); - int (*inode_listxattr)(struct dentry *dentry); - int (*inode_removexattr)(struct dentry *dentry, const char *name); - int (*inode_need_killpriv)(struct dentry *dentry); - int (*inode_killpriv)(struct dentry *dentry); - int (*inode_getsecurity)(struct inode *inode, const char *name, - void **buffer, bool alloc); - int (*inode_setsecurity)(struct inode *inode, const char *name, - const void *value, size_t size, - int flags); - int (*inode_listsecurity)(struct inode *inode, char *buffer, - size_t buffer_size); - void (*inode_getsecid)(struct inode *inode, u32 *secid); - int (*inode_copy_up)(struct dentry *src, struct cred **new); - int (*inode_copy_up_xattr)(const char *name); - - int (*kernfs_init_security)(struct kernfs_node *kn_dir, - struct kernfs_node *kn); - - int (*file_permission)(struct file *file, int mask); - int (*file_alloc_security)(struct file *file); - void (*file_free_security)(struct file *file); - int (*file_ioctl)(struct file *file, unsigned int cmd, - unsigned long arg); - int (*mmap_addr)(unsigned long addr); - int (*mmap_file)(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags); - int (*file_mprotect)(struct vm_area_struct *vma, unsigned long reqprot, - unsigned long prot); - int (*file_lock)(struct file *file, unsigned int cmd); - int (*file_fcntl)(struct file *file, unsigned int cmd, - unsigned long arg); - void (*file_set_fowner)(struct file *file); - int (*file_send_sigiotask)(struct task_struct *tsk, - struct fown_struct *fown, int sig); - int (*file_receive)(struct file *file); - int (*file_open)(struct file *file); - - int (*task_alloc)(struct task_struct *task, unsigned long clone_flags); - void (*task_free)(struct task_struct *task); - int (*cred_alloc_blank)(struct cred *cred, gfp_t gfp); - void (*cred_free)(struct cred *cred); - int (*cred_prepare)(struct cred *new, const struct cred *old, - gfp_t gfp); - void (*cred_transfer)(struct cred *new, const struct cred *old); - void (*cred_getsecid)(const struct cred *c, u32 *secid); - int (*kernel_act_as)(struct cred *new, u32 secid); - int (*kernel_create_files_as)(struct cred *new, struct inode *inode); - int (*kernel_module_request)(char *kmod_name); - int (*kernel_load_data)(enum kernel_load_data_id id); - int (*kernel_read_file)(struct file *file, enum kernel_read_file_id id); - int (*kernel_post_read_file)(struct file *file, char *buf, loff_t size, - enum kernel_read_file_id id); - int (*task_fix_setuid)(struct cred *new, const struct cred *old, - int flags); - int (*task_setpgid)(struct task_struct *p, pid_t pgid); - int (*task_getpgid)(struct task_struct *p); - int (*task_getsid)(struct task_struct *p); - void (*task_getsecid)(struct task_struct *p, u32 *secid); - int (*task_setnice)(struct task_struct *p, int nice); - int (*task_setioprio)(struct task_struct *p, int ioprio); - int (*task_getioprio)(struct task_struct *p); - int (*task_prlimit)(const struct cred *cred, const struct cred *tcred, - unsigned int flags); - int (*task_setrlimit)(struct task_struct *p, unsigned int resource, - struct rlimit *new_rlim); - int (*task_setscheduler)(struct task_struct *p); - int (*task_getscheduler)(struct task_struct *p); - int (*task_movememory)(struct task_struct *p); - int (*task_kill)(struct task_struct *p, struct kernel_siginfo *info, - int sig, const struct cred *cred); - int (*task_prctl)(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); - void (*task_to_inode)(struct task_struct *p, struct inode *inode); - - int (*ipc_permission)(struct kern_ipc_perm *ipcp, short flag); - void (*ipc_getsecid)(struct kern_ipc_perm *ipcp, u32 *secid); - - int (*msg_msg_alloc_security)(struct msg_msg *msg); - void (*msg_msg_free_security)(struct msg_msg *msg); - - int (*msg_queue_alloc_security)(struct kern_ipc_perm *perm); - void (*msg_queue_free_security)(struct kern_ipc_perm *perm); - int (*msg_queue_associate)(struct kern_ipc_perm *perm, int msqflg); - int (*msg_queue_msgctl)(struct kern_ipc_perm *perm, int cmd); - int (*msg_queue_msgsnd)(struct kern_ipc_perm *perm, struct msg_msg *msg, - int msqflg); - int (*msg_queue_msgrcv)(struct kern_ipc_perm *perm, struct msg_msg *msg, - struct task_struct *target, long type, - int mode); - - int (*shm_alloc_security)(struct kern_ipc_perm *perm); - void (*shm_free_security)(struct kern_ipc_perm *perm); - int (*shm_associate)(struct kern_ipc_perm *perm, int shmflg); - int (*shm_shmctl)(struct kern_ipc_perm *perm, int cmd); - int (*shm_shmat)(struct kern_ipc_perm *perm, char __user *shmaddr, - int shmflg); - - int (*sem_alloc_security)(struct kern_ipc_perm *perm); - void (*sem_free_security)(struct kern_ipc_perm *perm); - int (*sem_associate)(struct kern_ipc_perm *perm, int semflg); - int (*sem_semctl)(struct kern_ipc_perm *perm, int cmd); - int (*sem_semop)(struct kern_ipc_perm *perm, struct sembuf *sops, - unsigned nsops, int alter); - - int (*netlink_send)(struct sock *sk, struct sk_buff *skb); - - void (*d_instantiate)(struct dentry *dentry, struct inode *inode); - - int (*getprocattr)(struct task_struct *p, char *name, char **value); - int (*setprocattr)(const char *name, void *value, size_t size); - int (*ismaclabel)(const char *name); - int (*secid_to_secctx)(u32 secid, char **secdata, u32 *seclen); - int (*secctx_to_secid)(const char *secdata, u32 seclen, u32 *secid); - void (*release_secctx)(char *secdata, u32 seclen); - - void (*inode_invalidate_secctx)(struct inode *inode); - int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen); - int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen); - int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen); - -#ifdef CONFIG_SECURITY_NETWORK - int (*unix_stream_connect)(struct sock *sock, struct sock *other, - struct sock *newsk); - int (*unix_may_send)(struct socket *sock, struct socket *other); - - int (*socket_create)(int family, int type, int protocol, int kern); - int (*socket_post_create)(struct socket *sock, int family, int type, - int protocol, int kern); - int (*socket_socketpair)(struct socket *socka, struct socket *sockb); - int (*socket_bind)(struct socket *sock, struct sockaddr *address, - int addrlen); - int (*socket_connect)(struct socket *sock, struct sockaddr *address, - int addrlen); - int (*socket_listen)(struct socket *sock, int backlog); - int (*socket_accept)(struct socket *sock, struct socket *newsock); - int (*socket_sendmsg)(struct socket *sock, struct msghdr *msg, - int size); - int (*socket_recvmsg)(struct socket *sock, struct msghdr *msg, - int size, int flags); - int (*socket_getsockname)(struct socket *sock); - int (*socket_getpeername)(struct socket *sock); - int (*socket_getsockopt)(struct socket *sock, int level, int optname); - int (*socket_setsockopt)(struct socket *sock, int level, int optname); - int (*socket_shutdown)(struct socket *sock, int how); - int (*socket_sock_rcv_skb)(struct sock *sk, struct sk_buff *skb); - int (*socket_getpeersec_stream)(struct socket *sock, - char __user *optval, - int __user *optlen, unsigned len); - int (*socket_getpeersec_dgram)(struct socket *sock, - struct sk_buff *skb, u32 *secid); - int (*sk_alloc_security)(struct sock *sk, int family, gfp_t priority); - void (*sk_free_security)(struct sock *sk); - void (*sk_clone_security)(const struct sock *sk, struct sock *newsk); - void (*sk_getsecid)(struct sock *sk, u32 *secid); - void (*sock_graft)(struct sock *sk, struct socket *parent); - int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb, - struct request_sock *req); - void (*inet_csk_clone)(struct sock *newsk, - const struct request_sock *req); - void (*inet_conn_established)(struct sock *sk, struct sk_buff *skb); - int (*secmark_relabel_packet)(u32 secid); - void (*secmark_refcount_inc)(void); - void (*secmark_refcount_dec)(void); - void (*req_classify_flow)(const struct request_sock *req, - struct flowi *fl); - int (*tun_dev_alloc_security)(void **security); - void (*tun_dev_free_security)(void *security); - int (*tun_dev_create)(void); - int (*tun_dev_attach_queue)(void *security); - int (*tun_dev_attach)(struct sock *sk, void *security); - int (*tun_dev_open)(void *security); - int (*sctp_assoc_request)(struct sctp_endpoint *ep, - struct sk_buff *skb); - int (*sctp_bind_connect)(struct sock *sk, int optname, - struct sockaddr *address, int addrlen); - void (*sctp_sk_clone)(struct sctp_endpoint *ep, struct sock *sk, - struct sock *newsk); -#endif /* CONFIG_SECURITY_NETWORK */ - -#ifdef CONFIG_SECURITY_INFINIBAND - int (*ib_pkey_access)(void *sec, u64 subnet_prefix, u16 pkey); - int (*ib_endport_manage_subnet)(void *sec, const char *dev_name, - u8 port_num); - int (*ib_alloc_security)(void **sec); - void (*ib_free_security)(void *sec); -#endif /* CONFIG_SECURITY_INFINIBAND */ - -#ifdef CONFIG_SECURITY_NETWORK_XFRM - int (*xfrm_policy_alloc_security)(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *sec_ctx, - gfp_t gfp); - int (*xfrm_policy_clone_security)(struct xfrm_sec_ctx *old_ctx, - struct xfrm_sec_ctx **new_ctx); - void (*xfrm_policy_free_security)(struct xfrm_sec_ctx *ctx); - int (*xfrm_policy_delete_security)(struct xfrm_sec_ctx *ctx); - int (*xfrm_state_alloc)(struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx); - int (*xfrm_state_alloc_acquire)(struct xfrm_state *x, - struct xfrm_sec_ctx *polsec, - u32 secid); - void (*xfrm_state_free_security)(struct xfrm_state *x); - int (*xfrm_state_delete_security)(struct xfrm_state *x); - int (*xfrm_policy_lookup)(struct xfrm_sec_ctx *ctx, u32 fl_secid, - u8 dir); - int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, - struct xfrm_policy *xp, - const struct flowi *fl); - int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); -#endif /* CONFIG_SECURITY_NETWORK_XFRM */ - - /* key management security hooks */ -#ifdef CONFIG_KEYS - int (*key_alloc)(struct key *key, const struct cred *cred, - unsigned long flags); - void (*key_free)(struct key *key); - int (*key_permission)(key_ref_t key_ref, const struct cred *cred, - unsigned perm); - int (*key_getsecurity)(struct key *key, char **_buffer); -#endif /* CONFIG_KEYS */ - -#ifdef CONFIG_AUDIT - int (*audit_rule_init)(u32 field, u32 op, char *rulestr, - void **lsmrule); - int (*audit_rule_known)(struct audit_krule *krule); - int (*audit_rule_match)(u32 secid, u32 field, u32 op, void *lsmrule); - void (*audit_rule_free)(void *lsmrule); -#endif /* CONFIG_AUDIT */ - -#ifdef CONFIG_BPF_SYSCALL - int (*bpf)(int cmd, union bpf_attr *attr, - unsigned int size); - int (*bpf_map)(struct bpf_map *map, fmode_t fmode); - int (*bpf_prog)(struct bpf_prog *prog); - int (*bpf_map_alloc_security)(struct bpf_map *map); - void (*bpf_map_free_security)(struct bpf_map *map); - int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux); - void (*bpf_prog_free_security)(struct bpf_prog_aux *aux); -#endif /* CONFIG_BPF_SYSCALL */ - int (*locked_down)(enum lockdown_reason what); -#ifdef CONFIG_PERF_EVENTS - int (*perf_event_open)(struct perf_event_attr *attr, int type); - int (*perf_event_alloc)(struct perf_event *event); - void (*perf_event_free)(struct perf_event *event); - int (*perf_event_read)(struct perf_event *event); - int (*perf_event_write)(struct perf_event *event); - -#endif + #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); + #include "lsm_hook_defs.h" + #undef LSM_HOOK }; struct security_hook_heads { - struct hlist_head binder_set_context_mgr; - struct hlist_head binder_transaction; - struct hlist_head binder_transfer_binder; - struct hlist_head binder_transfer_file; - struct hlist_head ptrace_access_check; - struct hlist_head ptrace_traceme; - struct hlist_head capget; - struct hlist_head capset; - struct hlist_head capable; - struct hlist_head quotactl; - struct hlist_head quota_on; - struct hlist_head syslog; - struct hlist_head settime; - struct hlist_head vm_enough_memory; - struct hlist_head bprm_set_creds; - struct hlist_head bprm_check_security; - struct hlist_head bprm_committing_creds; - struct hlist_head bprm_committed_creds; - struct hlist_head fs_context_dup; - struct hlist_head fs_context_parse_param; - struct hlist_head sb_alloc_security; - struct hlist_head sb_free_security; - struct hlist_head sb_free_mnt_opts; - struct hlist_head sb_eat_lsm_opts; - struct hlist_head sb_remount; - struct hlist_head sb_kern_mount; - struct hlist_head sb_show_options; - struct hlist_head sb_statfs; - struct hlist_head sb_mount; - struct hlist_head sb_umount; - struct hlist_head sb_pivotroot; - struct hlist_head sb_set_mnt_opts; - struct hlist_head sb_clone_mnt_opts; - struct hlist_head sb_add_mnt_opt; - struct hlist_head move_mount; - struct hlist_head dentry_init_security; - struct hlist_head dentry_create_files_as; -#ifdef CONFIG_SECURITY_PATH - struct hlist_head path_unlink; - struct hlist_head path_mkdir; - struct hlist_head path_rmdir; - struct hlist_head path_mknod; - struct hlist_head path_truncate; - struct hlist_head path_symlink; - struct hlist_head path_link; - struct hlist_head path_rename; - struct hlist_head path_chmod; - struct hlist_head path_chown; - struct hlist_head path_chroot; -#endif - /* Needed for inode based modules as well */ - struct hlist_head path_notify; - struct hlist_head inode_alloc_security; - struct hlist_head inode_free_security; - struct hlist_head inode_init_security; - struct hlist_head inode_create; - struct hlist_head inode_link; - struct hlist_head inode_unlink; - struct hlist_head inode_symlink; - struct hlist_head inode_mkdir; - struct hlist_head inode_rmdir; - struct hlist_head inode_mknod; - struct hlist_head inode_rename; - struct hlist_head inode_readlink; - struct hlist_head inode_follow_link; - struct hlist_head inode_permission; - struct hlist_head inode_setattr; - struct hlist_head inode_getattr; - struct hlist_head inode_setxattr; - struct hlist_head inode_post_setxattr; - struct hlist_head inode_getxattr; - struct hlist_head inode_listxattr; - struct hlist_head inode_removexattr; - struct hlist_head inode_need_killpriv; - struct hlist_head inode_killpriv; - struct hlist_head inode_getsecurity; - struct hlist_head inode_setsecurity; - struct hlist_head inode_listsecurity; - struct hlist_head inode_getsecid; - struct hlist_head inode_copy_up; - struct hlist_head inode_copy_up_xattr; - struct hlist_head kernfs_init_security; - struct hlist_head file_permission; - struct hlist_head file_alloc_security; - struct hlist_head file_free_security; - struct hlist_head file_ioctl; - struct hlist_head mmap_addr; - struct hlist_head mmap_file; - struct hlist_head file_mprotect; - struct hlist_head file_lock; - struct hlist_head file_fcntl; - struct hlist_head file_set_fowner; - struct hlist_head file_send_sigiotask; - struct hlist_head file_receive; - struct hlist_head file_open; - struct hlist_head task_alloc; - struct hlist_head task_free; - struct hlist_head cred_alloc_blank; - struct hlist_head cred_free; - struct hlist_head cred_prepare; - struct hlist_head cred_transfer; - struct hlist_head cred_getsecid; - struct hlist_head kernel_act_as; - struct hlist_head kernel_create_files_as; - struct hlist_head kernel_load_data; - struct hlist_head kernel_read_file; - struct hlist_head kernel_post_read_file; - struct hlist_head kernel_module_request; - struct hlist_head task_fix_setuid; - struct hlist_head task_setpgid; - struct hlist_head task_getpgid; - struct hlist_head task_getsid; - struct hlist_head task_getsecid; - struct hlist_head task_setnice; - struct hlist_head task_setioprio; - struct hlist_head task_getioprio; - struct hlist_head task_prlimit; - struct hlist_head task_setrlimit; - struct hlist_head task_setscheduler; - struct hlist_head task_getscheduler; - struct hlist_head task_movememory; - struct hlist_head task_kill; - struct hlist_head task_prctl; - struct hlist_head task_to_inode; - struct hlist_head ipc_permission; - struct hlist_head ipc_getsecid; - struct hlist_head msg_msg_alloc_security; - struct hlist_head msg_msg_free_security; - struct hlist_head msg_queue_alloc_security; - struct hlist_head msg_queue_free_security; - struct hlist_head msg_queue_associate; - struct hlist_head msg_queue_msgctl; - struct hlist_head msg_queue_msgsnd; - struct hlist_head msg_queue_msgrcv; - struct hlist_head shm_alloc_security; - struct hlist_head shm_free_security; - struct hlist_head shm_associate; - struct hlist_head shm_shmctl; - struct hlist_head shm_shmat; - struct hlist_head sem_alloc_security; - struct hlist_head sem_free_security; - struct hlist_head sem_associate; - struct hlist_head sem_semctl; - struct hlist_head sem_semop; - struct hlist_head netlink_send; - struct hlist_head d_instantiate; - struct hlist_head getprocattr; - struct hlist_head setprocattr; - struct hlist_head ismaclabel; - struct hlist_head secid_to_secctx; - struct hlist_head secctx_to_secid; - struct hlist_head release_secctx; - struct hlist_head inode_invalidate_secctx; - struct hlist_head inode_notifysecctx; - struct hlist_head inode_setsecctx; - struct hlist_head inode_getsecctx; -#ifdef CONFIG_SECURITY_NETWORK - struct hlist_head unix_stream_connect; - struct hlist_head unix_may_send; - struct hlist_head socket_create; - struct hlist_head socket_post_create; - struct hlist_head socket_socketpair; - struct hlist_head socket_bind; - struct hlist_head socket_connect; - struct hlist_head socket_listen; - struct hlist_head socket_accept; - struct hlist_head socket_sendmsg; - struct hlist_head socket_recvmsg; - struct hlist_head socket_getsockname; - struct hlist_head socket_getpeername; - struct hlist_head socket_getsockopt; - struct hlist_head socket_setsockopt; - struct hlist_head socket_shutdown; - struct hlist_head socket_sock_rcv_skb; - struct hlist_head socket_getpeersec_stream; - struct hlist_head socket_getpeersec_dgram; - struct hlist_head sk_alloc_security; - struct hlist_head sk_free_security; - struct hlist_head sk_clone_security; - struct hlist_head sk_getsecid; - struct hlist_head sock_graft; - struct hlist_head inet_conn_request; - struct hlist_head inet_csk_clone; - struct hlist_head inet_conn_established; - struct hlist_head secmark_relabel_packet; - struct hlist_head secmark_refcount_inc; - struct hlist_head secmark_refcount_dec; - struct hlist_head req_classify_flow; - struct hlist_head tun_dev_alloc_security; - struct hlist_head tun_dev_free_security; - struct hlist_head tun_dev_create; - struct hlist_head tun_dev_attach_queue; - struct hlist_head tun_dev_attach; - struct hlist_head tun_dev_open; - struct hlist_head sctp_assoc_request; - struct hlist_head sctp_bind_connect; - struct hlist_head sctp_sk_clone; -#endif /* CONFIG_SECURITY_NETWORK */ -#ifdef CONFIG_SECURITY_INFINIBAND - struct hlist_head ib_pkey_access; - struct hlist_head ib_endport_manage_subnet; - struct hlist_head ib_alloc_security; - struct hlist_head ib_free_security; -#endif /* CONFIG_SECURITY_INFINIBAND */ -#ifdef CONFIG_SECURITY_NETWORK_XFRM - struct hlist_head xfrm_policy_alloc_security; - struct hlist_head xfrm_policy_clone_security; - struct hlist_head xfrm_policy_free_security; - struct hlist_head xfrm_policy_delete_security; - struct hlist_head xfrm_state_alloc; - struct hlist_head xfrm_state_alloc_acquire; - struct hlist_head xfrm_state_free_security; - struct hlist_head xfrm_state_delete_security; - struct hlist_head xfrm_policy_lookup; - struct hlist_head xfrm_state_pol_flow_match; - struct hlist_head xfrm_decode_session; -#endif /* CONFIG_SECURITY_NETWORK_XFRM */ -#ifdef CONFIG_KEYS - struct hlist_head key_alloc; - struct hlist_head key_free; - struct hlist_head key_permission; - struct hlist_head key_getsecurity; -#endif /* CONFIG_KEYS */ -#ifdef CONFIG_AUDIT - struct hlist_head audit_rule_init; - struct hlist_head audit_rule_known; - struct hlist_head audit_rule_match; - struct hlist_head audit_rule_free; -#endif /* CONFIG_AUDIT */ -#ifdef CONFIG_BPF_SYSCALL - struct hlist_head bpf; - struct hlist_head bpf_map; - struct hlist_head bpf_prog; - struct hlist_head bpf_map_alloc_security; - struct hlist_head bpf_map_free_security; - struct hlist_head bpf_prog_alloc_security; - struct hlist_head bpf_prog_free_security; -#endif /* CONFIG_BPF_SYSCALL */ - struct hlist_head locked_down; -#ifdef CONFIG_PERF_EVENTS - struct hlist_head perf_event_open; - struct hlist_head perf_event_alloc; - struct hlist_head perf_event_free; - struct hlist_head perf_event_read; - struct hlist_head perf_event_write; -#endif + #define LSM_HOOK(RET, DEFAULT, NAME, ...) struct hlist_head NAME; + #include "lsm_hook_defs.h" + #undef LSM_HOOK } __randomize_layout; /* @@ -2100,6 +1490,12 @@ struct lsm_blob_sizes { int lbs_task; }; +/* + * LSM_RET_VOID is used as the default value in LSM_HOOK definitions for void + * LSM hooks (in include/linux/lsm_hook_defs.h). + */ +#define LSM_RET_VOID ((void) 0) + /* * Initializing a security_hook_list structure takes * up a lot of space in a source file. This macro takes -- cgit v1.2.3 From 9d3fdea789c8fab51381c2d609932fabe94c0517 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 29 Mar 2020 01:43:51 +0100 Subject: bpf: lsm: Provide attachment points for BPF LSM programs When CONFIG_BPF_LSM is enabled, nop functions, bpf_lsm_, are generated for each LSM hook. These functions are initialized as LSM hooks in a subsequent patch. Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Reviewed-by: Brendan Jackman Reviewed-by: Florent Revest Reviewed-by: Kees Cook Acked-by: Yonghong Song Acked-by: James Morris Link: https://lore.kernel.org/bpf/20200329004356.27286-4-kpsingh@chromium.org --- include/linux/bpf_lsm.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/linux/bpf_lsm.h (limited to 'include/linux') diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h new file mode 100644 index 000000000000..83b96895829f --- /dev/null +++ b/include/linux/bpf_lsm.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (C) 2020 Google LLC. + */ + +#ifndef _LINUX_BPF_LSM_H +#define _LINUX_BPF_LSM_H + +#include +#include + +#ifdef CONFIG_BPF_LSM + +#define LSM_HOOK(RET, DEFAULT, NAME, ...) \ + RET bpf_lsm_##NAME(__VA_ARGS__); +#include +#undef LSM_HOOK + +#endif /* CONFIG_BPF_LSM */ + +#endif /* _LINUX_BPF_LSM_H */ -- cgit v1.2.3 From 9e4e01dfd3254c7f04f24b7c6b29596bc12332f3 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 29 Mar 2020 01:43:52 +0100 Subject: bpf: lsm: Implement attach, detach and execution JITed BPF programs are dynamically attached to the LSM hooks using BPF trampolines. The trampoline prologue generates code to handle conversion of the signature of the hook to the appropriate BPF context. The allocated trampoline programs are attached to the nop functions initialized as LSM hooks. BPF_PROG_TYPE_LSM programs must have a GPL compatible license and and need CAP_SYS_ADMIN (required for loading eBPF programs). Upon attachment: * A BPF fexit trampoline is used for LSM hooks with a void return type. * A BPF fmod_ret trampoline is used for LSM hooks which return an int. The attached programs can override the return value of the bpf LSM hook to indicate a MAC Policy decision. Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Reviewed-by: Brendan Jackman Reviewed-by: Florent Revest Acked-by: Andrii Nakryiko Acked-by: James Morris Link: https://lore.kernel.org/bpf/20200329004356.27286-5-kpsingh@chromium.org --- include/linux/bpf_lsm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 83b96895829f..af74712af585 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -17,6 +17,17 @@ #include #undef LSM_HOOK +int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, + const struct bpf_prog *prog); + +#else /* !CONFIG_BPF_LSM */ + +static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, + const struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ -- cgit v1.2.3 From a08e7fd9123d85dfdf8d1dc61dbe321c8359d25f Mon Sep 17 00:00:00 2001 From: Cambda Zhu Date: Thu, 26 Mar 2020 15:33:14 +0800 Subject: net: Fix typo of SKB_SGO_CB_OFFSET The SKB_SGO_CB_OFFSET should be SKB_GSO_CB_OFFSET which means the offset of the GSO in skb cb. This patch fixes the typo. Fixes: 9207f9d45b0a ("net: preserve IP control block during GSO segmentation") Signed-off-by: Cambda Zhu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e47895082b4b..28b1a2b4459e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4389,8 +4389,8 @@ struct skb_gso_cb { __wsum csum; __u16 csum_start; }; -#define SKB_SGO_CB_OFFSET 32 -#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_SGO_CB_OFFSET)) +#define SKB_GSO_CB_OFFSET 32 +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET)) static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) { -- cgit v1.2.3 From 3df523ab582c52f745f9a73b9ebf9368ede555ac Mon Sep 17 00:00:00 2001 From: Peter Krystad Date: Fri, 27 Mar 2020 14:48:37 -0700 Subject: mptcp: Add ADD_ADDR handling Add handling for sending and receiving the ADD_ADDR, ADD_ADDR6, and RM_ADDR suboptions. Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Peter Krystad Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/linux/tcp.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3dc964010fef..1225db308957 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -86,9 +86,13 @@ struct mptcp_options_received { u64 data_seq; u32 subflow_seq; u16 data_len; - u8 mp_capable : 1, + u16 mp_capable : 1, mp_join : 1, - dss : 1; + dss : 1, + add_addr : 1, + rm_addr : 1, + family : 4, + echo : 1; u8 use_map:1, dsn64:1, data_fin:1, @@ -96,6 +100,16 @@ struct mptcp_options_received { ack64:1, mpc_map:1, __unused:2; + u8 addr_id; + u8 rm_id; + union { + struct in_addr addr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + struct in6_addr addr6; +#endif + }; + u64 ahmac; + u16 port; }; #endif @@ -131,6 +145,8 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt) #if IS_ENABLED(CONFIG_MPTCP) rx_opt->mptcp.mp_capable = 0; rx_opt->mptcp.mp_join = 0; + rx_opt->mptcp.add_addr = 0; + rx_opt->mptcp.rm_addr = 0; rx_opt->mptcp.dss = 0; #endif } -- cgit v1.2.3 From f296234c98a8fcec94eec80304a873f635d350ea Mon Sep 17 00:00:00 2001 From: Peter Krystad Date: Fri, 27 Mar 2020 14:48:39 -0700 Subject: mptcp: Add handling of incoming MP_JOIN requests Process the MP_JOIN option in a SYN packet with the same flow as MP_CAPABLE but when the third ACK is received add the subflow to the MPTCP socket subflow list instead of adding it to the TCP socket accept queue. The subflow is added at the end of the subflow list so it will not interfere with the existing subflows operation and no data is expected to be transmitted on it. Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Peter Krystad Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/linux/tcp.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1225db308957..421c99c12291 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -92,7 +92,13 @@ struct mptcp_options_received { add_addr : 1, rm_addr : 1, family : 4, - echo : 1; + echo : 1, + backup : 1; + u32 token; + u32 nonce; + u64 thmac; + u8 hmac[20]; + u8 join_id; u8 use_map:1, dsn64:1, data_fin:1, -- cgit v1.2.3 From 8610c7c6e3bd647ff98d21c8bc0580e77bc2f8b3 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 27 Mar 2020 18:00:20 -0400 Subject: net: ipv6: add support for rpl sr exthdr This patch adds rpl source routing receive handling. Everything works only if sysconf "rpl_seg_enabled" and source routing is enabled. Mostly the same behaviour as IPv6 segmentation routing. To handle compression and uncompression a rpl.c file is created which contains the necessary functionality. The receive handling will also care about IPv6 encapsulated so far it's specified as possible nexthdr in RFC 6554. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ea7c7906591e..2cb445a8fc9e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -74,6 +74,7 @@ struct ipv6_devconf { __u32 addr_gen_mode; __s32 disable_policy; __s32 ndisc_tclass; + __s32 rpl_seg_enabled; struct ctl_table_header *sysctl_header; }; -- cgit v1.2.3 From 62582a7ee78364c6106d09d5e0f1dc7f564be887 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sun, 29 Mar 2020 07:55:10 -0700 Subject: ptp: Avoid deadlocks in the programmable pin code. The PTP Hardware Clock (PHC) subsystem offers an API for configuring programmable pins. User space sets or gets the settings using ioctls, and drivers verify dialed settings via a callback. Drivers may also query pin settings by calling the ptp_find_pin() method. Although the core subsystem protects concurrent access to the pin settings, the implementation places illogical restrictions on how drivers may call ptp_find_pin(). When enabling an auxiliary function via the .enable(on=1) callback, drivers may invoke the pin finding method, but when disabling with .enable(on=0) drivers are not permitted to do so. With the exception of the mv88e6xxx, all of the PHC drivers do respect this restriction, but still the locking pattern is both confusing and unnecessary. This patch changes the locking implementation to allow PHC drivers to freely call ptp_find_pin() from their .enable() and .verify() callbacks. V2 ChangeLog: - fixed spelling in the kernel doc - add Vladimir's tested by tag Signed-off-by: Richard Cochran Reported-by: Yangbo Lu Tested-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index c64a1ef87240..121a7eda4593 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -223,6 +223,12 @@ extern s32 scaled_ppm_to_ppb(long ppm); /** * ptp_find_pin() - obtain the pin index of a given auxiliary function * + * The caller must hold ptp_clock::pincfg_mux. Drivers do not have + * access to that mutex as ptp_clock is an opaque type. However, the + * core code acquires the mutex before invoking the driver's + * ptp_clock_info::enable() callback, and so drivers may call this + * function from that context. + * * @ptp: The clock obtained from ptp_clock_register(). * @func: One of the ptp_pin_function enumerated values. * @chan: The particular functional channel to find. @@ -233,6 +239,19 @@ extern s32 scaled_ppm_to_ppb(long ppm); int ptp_find_pin(struct ptp_clock *ptp, enum ptp_pin_function func, unsigned int chan); +/** + * ptp_find_pin_unlocked() - wrapper for ptp_find_pin() + * + * This function acquires the ptp_clock::pincfg_mux mutex before + * invoking ptp_find_pin(). Instead of using this function, drivers + * should most likely call ptp_find_pin() directly from their + * ptp_clock_info::enable() method. + * + */ + +int ptp_find_pin_unlocked(struct ptp_clock *ptp, + enum ptp_pin_function func, unsigned int chan); + /** * ptp_schedule_worker() - schedule ptp auxiliary work * -- cgit v1.2.3 From 8063f761cd7c17fc1d0018728936e0c33a25388a Mon Sep 17 00:00:00 2001 From: Yuval Basson Date: Sun, 29 Mar 2020 20:32:49 +0300 Subject: qed: Fix use after free in qed_chain_free The qed_chain data structure was modified in commit 1a4a69751f4d ("qed: Chain support for external PBL") to support receiving an external pbl (due to iWARP FW requirements). The pages pointed to by the pbl are allocated in qed_chain_alloc and their virtual address are stored in an virtual addresses array to enable accessing and freeing the data. The physical addresses however weren't stored and were accessed directly from the external-pbl during free. Destroy-qp flow, leads to freeing the external pbl before the chain is freed, when the chain is freed it tries accessing the already freed external pbl, leading to a use-after-free. Therefore we need to store the physical addresses in additional to the virtual addresses in a new data structure. Fixes: 1a4a69751f4d ("qed: Chain support for external PBL") Signed-off-by: Michal Kalderon Signed-off-by: Yuval Bason Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 2dd0a9ed5b36..733fad7dfbed 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -97,6 +97,11 @@ struct qed_chain_u32 { u32 cons_idx; }; +struct addr_tbl_entry { + void *virt_addr; + dma_addr_t dma_map; +}; + struct qed_chain { /* fastpath portion of the chain - required for commands such * as produce / consume. @@ -107,10 +112,11 @@ struct qed_chain { /* Fastpath portions of the PBL [if exists] */ struct { - /* Table for keeping the virtual addresses of the chain pages, - * respectively to the physical addresses in the pbl table. + /* Table for keeping the virtual and physical addresses of the + * chain pages, respectively to the physical addresses + * in the pbl table. */ - void **pp_virt_addr_tbl; + struct addr_tbl_entry *pp_addr_tbl; union { struct qed_chain_pbl_u16 u16; @@ -287,7 +293,7 @@ qed_chain_advance_page(struct qed_chain *p_chain, *(u32 *)page_to_inc = 0; page_index = *(u32 *)page_to_inc; } - *p_next_elem = p_chain->pbl.pp_virt_addr_tbl[page_index]; + *p_next_elem = p_chain->pbl.pp_addr_tbl[page_index].virt_addr; } } @@ -537,7 +543,7 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain, p_chain->pbl_sp.p_phys_table = 0; p_chain->pbl_sp.p_virt_table = NULL; - p_chain->pbl.pp_virt_addr_tbl = NULL; + p_chain->pbl.pp_addr_tbl = NULL; } /** @@ -575,11 +581,11 @@ static inline void qed_chain_init_mem(struct qed_chain *p_chain, static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain, void *p_virt_pbl, dma_addr_t p_phys_pbl, - void **pp_virt_addr_tbl) + struct addr_tbl_entry *pp_addr_tbl) { p_chain->pbl_sp.p_phys_table = p_phys_pbl; p_chain->pbl_sp.p_virt_table = p_virt_pbl; - p_chain->pbl.pp_virt_addr_tbl = pp_virt_addr_tbl; + p_chain->pbl.pp_addr_tbl = pp_addr_tbl; } /** @@ -644,7 +650,7 @@ static inline void *qed_chain_get_last_elem(struct qed_chain *p_chain) break; case QED_CHAIN_MODE_PBL: last_page_idx = p_chain->page_cnt - 1; - p_virt_addr = p_chain->pbl.pp_virt_addr_tbl[last_page_idx]; + p_virt_addr = p_chain->pbl.pp_addr_tbl[last_page_idx].virt_addr; break; } /* p_virt_addr points at this stage to the last page of the chain */ @@ -716,7 +722,7 @@ static inline void qed_chain_pbl_zero_mem(struct qed_chain *p_chain) page_cnt = qed_chain_get_page_cnt(p_chain); for (i = 0; i < page_cnt; i++) - memset(p_chain->pbl.pp_virt_addr_tbl[i], 0, + memset(p_chain->pbl.pp_addr_tbl[i].virt_addr, 0, QED_CHAIN_PAGE_SIZE); } -- cgit v1.2.3 From 0bd274060a0f49d974b7e88fa87b6e1c1f496a48 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 30 Mar 2020 18:44:44 +0100 Subject: net: phylink: change phylink_mii_c22_pcs_set_advertisement() prototype Change phylink_mii_c22_pcs_set_advertisement() to take only the PHY interface and advertisement mask, rather than the full phylink state. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 8fa6df3b881b..6f6ecf3e0be1 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -320,7 +320,8 @@ void phylink_helper_basex_speed(struct phylink_link_state *state); void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, struct phylink_link_state *state); int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs, - const struct phylink_link_state *state); + phy_interface_t interface, + const unsigned long *advertising); void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs); void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs, -- cgit v1.2.3 From e7765d634aaa9dd5db3cb59155269ef6c18d4592 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 30 Mar 2020 18:44:50 +0100 Subject: net: phylink: rename 'ops' to 'mac_ops' Rename the bland 'ops' member of struct phylink to be a more descriptive 'mac_ops' - this is necessary as we're about to introduce another set of operations. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 6f6ecf3e0be1..90c907eaae15 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -272,7 +272,7 @@ void mac_link_up(struct phylink_config *config, struct phy_device *phy, struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, - const struct phylink_mac_ops *ops); + const struct phylink_mac_ops *mac_ops); void phylink_destroy(struct phylink *); int phylink_connect_phy(struct phylink *, struct phy_device *); -- cgit v1.2.3 From 4c0d6d3a7a81fcd2dcb4abf15fe2e13074cf8619 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 30 Mar 2020 18:44:55 +0100 Subject: net: phylink: add separate pcs operations structure Add a separate set of PCS operations, which MAC drivers can use to couple phylink with their associated MAC PCS layer. The PCS operations include: - pcs_get_state() - reads the link up/down, resolved speed, duplex and pause from the PCS. - pcs_config() - configures the PCS for the specified mode, PHY interface type, and setting the advertisement. - pcs_an_restart() - restarts 802.3 in-band negotiation with the link partner - pcs_link_up() - informs the PCS that link has come up, and the parameters of the link. Link parameters are used to program the PCS for fixed speed and non-inband modes. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 88 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 90c907eaae15..3f8d37ec5503 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -270,9 +270,97 @@ void mac_link_up(struct phylink_config *config, struct phy_device *phy, int speed, int duplex, bool tx_pause, bool rx_pause); #endif +/** + * struct phylink_pcs_ops - MAC PCS operations structure. + * @pcs_get_state: read the current MAC PCS link state from the hardware. + * @pcs_config: configure the MAC PCS for the selected mode and state. + * @pcs_an_restart: restart 802.3z BaseX autonegotiation. + * @pcs_link_up: program the PCS for the resolved link configuration + * (where necessary). + */ +struct phylink_pcs_ops { + void (*pcs_get_state)(struct phylink_config *config, + struct phylink_link_state *state); + int (*pcs_config)(struct phylink_config *config, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising); + void (*pcs_an_restart)(struct phylink_config *config); + void (*pcs_link_up)(struct phylink_config *config, unsigned int mode, + phy_interface_t interface, int speed, int duplex); +}; + +#if 0 /* For kernel-doc purposes only. */ +/** + * pcs_get_state() - Read the current inband link state from the hardware + * @config: a pointer to a &struct phylink_config. + * @state: a pointer to a &struct phylink_link_state. + * + * Read the current inband link state from the MAC PCS, reporting the + * current speed in @state->speed, duplex mode in @state->duplex, pause + * mode in @state->pause using the %MLO_PAUSE_RX and %MLO_PAUSE_TX bits, + * negotiation completion state in @state->an_complete, and link up state + * in @state->link. If possible, @state->lp_advertising should also be + * populated. + * + * When present, this overrides mac_pcs_get_state() in &struct + * phylink_mac_ops. + */ +void pcs_get_state(struct phylink_config *config, + struct phylink_link_state *state); + +/** + * pcs_config() - Configure the PCS mode and advertisement + * @config: a pointer to a &struct phylink_config. + * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. + * @interface: interface mode to be used + * @advertising: adertisement ethtool link mode mask + * + * Configure the PCS for the operating mode, the interface mode, and set + * the advertisement mask. + * + * When operating in %MLO_AN_INBAND, inband should always be enabled, + * otherwise inband should be disabled. + * + * For SGMII, there is no advertisement from the MAC side, the PCS should + * be programmed to acknowledge the inband word from the PHY. + * + * For 1000BASE-X, the advertisement should be programmed into the PCS. + * + * For most 10GBASE-R, there is no advertisement. + */ +int (*pcs_config)(struct phylink_config *config, unsigned int mode, + phy_interface_t interface, const unsigned long *advertising); + +/** + * pcs_an_restart() - restart 802.3z BaseX autonegotiation + * @config: a pointer to a &struct phylink_config. + * + * When PCS ops are present, this overrides mac_an_restart() in &struct + * phylink_mac_ops. + */ +void (*pcs_an_restart)(struct phylink_config *config); + +/** + * pcs_link_up() - program the PCS for the resolved link configuration + * @config: a pointer to a &struct phylink_config. + * @mode: link autonegotiation mode + * @interface: link &typedef phy_interface_t mode + * @speed: link speed + * @duplex: link duplex + * + * This call will be made just before mac_link_up() to inform the PCS of + * the resolved link parameters. For example, a PCS operating in SGMII + * mode without in-band AN needs to be manually configured for the link + * and duplex setting. Otherwise, this should be a no-op. + */ +void (*pcs_link_up)(struct phylink_config *config, unsigned int mode, + phy_interface_t interface, int speed, int duplex); +#endif + struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, const struct phylink_mac_ops *mac_ops); +void phylink_add_pcs(struct phylink *, const struct phylink_pcs_ops *ops); void phylink_destroy(struct phylink *); int phylink_connect_phy(struct phylink *, struct phy_device *); -- cgit v1.2.3 From 3f50f132d8400e129fc9eb68b5020167ef80a244 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 30 Mar 2020 14:36:39 -0700 Subject: bpf: Verifier, do explicit ALU32 bounds tracking It is not possible for the current verifier to track ALU32 and JMP ops correctly. This can result in the verifier aborting with errors even though the program should be verifiable. BPF codes that hit this can work around it by changin int variables to 64-bit types, marking variables volatile, etc. But this is all very ugly so it would be better to avoid these tricks. But, the main reason to address this now is do_refine_retval_range() was assuming return values could not be negative. Once we fixed this code that was previously working will no longer work. See do_refine_retval_range() patch for details. And we don't want to suddenly cause programs that used to work to fail. The simplest example code snippet that illustrates the problem is likely this, 53: w8 = w0 // r8 <- [0, S32_MAX], // w8 <- [-S32_MIN, X] 54: w8 64-bit 2. MOV ALU64 - copy 64-bit -> 32-bit 3. op ALU32 - zext 32-bit -> 64-bit 4. op ALU64 - n/a 5. jmp ALU32 - 64-bit: var32_off | upper_32_bits(var64_off) 6. jmp ALU64 - 32-bit: (>> (<< var64_off)) Details for each case, For "MOV ALU32" BPF arch zero extends so we simply copy the bounds from 32-bit into 64-bit ensuring we truncate var_off and 64-bit bounds correctly. See zext_32_to_64. For "MOV ALU64" copy all bounds including 32-bit into new register. If the src register had 32-bit bounds the dst register will as well. For "op ALU32" zero extend 32-bit into 64-bit the same as move, see zext_32_to_64. For "op ALU64" calculate both 32-bit and 64-bit bounds no merging is done here. Except we have a special case. When RSH or ARSH is done we can't simply ignore shifting bits from 64-bit reg into the 32-bit subreg. So currently just push bounds from 64-bit into 32-bit. This will be correct in the sense that they will represent a valid state of the register. However we could lose some accuracy if an ARSH is following a jmp32 operation. We can handle this special case in a follow up series. For "jmp ALU32" mark 64-bit reg unknown and recalculate 64-bit bounds from tnum by setting var_off to ((<<(>>var_off)) | var32_off). We special case if 64-bit bounds has zero'd upper 32bits at which point we can simply copy 32-bit bounds into 64-bit register. This catches a common compiler trick where upper 32-bits are zeroed and then 32-bit ops are used followed by a 64-bit compare or 64-bit op on a pointer. See __reg_combine_64_into_32(). For "jmp ALU64" cast the bounds of the 64bit to their 32-bit counterpart. For example s32_min_value = (s32)reg->smin_value. For tnum use only the lower 32bits via, (>>(< Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/158560419880.10843.11448220440809118343.stgit@john-Precision-5820-Tower --- include/linux/bpf_verifier.h | 4 ++++ include/linux/limits.h | 1 + include/linux/tnum.h | 12 ++++++++++++ 3 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5406e6e96585..6abd5a778fcd 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -123,6 +123,10 @@ struct bpf_reg_state { s64 smax_value; /* maximum possible (s64)value */ u64 umin_value; /* minimum possible (u64)value */ u64 umax_value; /* maximum possible (u64)value */ + s32 s32_min_value; /* minimum possible (s32)value */ + s32 s32_max_value; /* maximum possible (s32)value */ + u32 u32_min_value; /* minimum possible (u32)value */ + u32 u32_max_value; /* maximum possible (u32)value */ /* parentage chain for liveness checking */ struct bpf_reg_state *parent; /* Inside the callee two registers can be both PTR_TO_STACK like diff --git a/include/linux/limits.h b/include/linux/limits.h index 76afcd24ff8c..0d3de82dd354 100644 --- a/include/linux/limits.h +++ b/include/linux/limits.h @@ -27,6 +27,7 @@ #define S16_MAX ((s16)(U16_MAX >> 1)) #define S16_MIN ((s16)(-S16_MAX - 1)) #define U32_MAX ((u32)~0U) +#define U32_MIN ((u32)0) #define S32_MAX ((s32)(U32_MAX >> 1)) #define S32_MIN ((s32)(-S32_MAX - 1)) #define U64_MAX ((u64)~0ULL) diff --git a/include/linux/tnum.h b/include/linux/tnum.h index ea627d1ab7e3..498dbcedb451 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -86,4 +86,16 @@ int tnum_strn(char *str, size_t size, struct tnum a); /* Format a tnum as tristate binary expansion */ int tnum_sbin(char *str, size_t size, struct tnum a); +/* Returns the 32-bit subreg */ +struct tnum tnum_subreg(struct tnum a); +/* Returns the tnum with the lower 32-bit subreg cleared */ +struct tnum tnum_clear_subreg(struct tnum a); +/* Returns the tnum with the lower 32-bit subreg set to value */ +struct tnum tnum_const_subreg(struct tnum a, u32 value); +/* Returns true if 32-bit subreg @a is a known constant*/ +static inline bool tnum_subreg_is_const(struct tnum a) +{ + return !(tnum_subreg(a)).mask; +} + #endif /* _LINUX_TNUM_H */ -- cgit v1.2.3 From af6eea57437a830293eab56246b6025cc7d46ee7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 29 Mar 2020 19:59:58 -0700 Subject: bpf: Implement bpf_link-based cgroup BPF program attachment Implement new sub-command to attach cgroup BPF programs and return FD-based bpf_link back on success. bpf_link, once attached to cgroup, cannot be replaced, except by owner having its FD. Cgroup bpf_link supports only BPF_F_ALLOW_MULTI semantics. Both link-based and prog-based BPF_F_ALLOW_MULTI attachments can be freely intermixed. To prevent bpf_cgroup_link from keeping cgroup alive past the point when no BPF program can be executed, implement auto-detachment of link. When cgroup_bpf_release() is called, all attached bpf_links are forced to release cgroup refcounts, but they leave bpf_link otherwise active and allocated, as well as still owning underlying bpf_prog. This is because user-space might still have FDs open and active, so bpf_link as a user-referenced object can't be freed yet. Once last active FD is closed, bpf_link will be freed and underlying bpf_prog refcount will be dropped. But cgroup refcount won't be touched, because cgroup is released already. The inherent race between bpf_cgroup_link release (from closing last FD) and cgroup_bpf_release() is resolved by both operations taking cgroup_mutex. So the only additional check required is when bpf_cgroup_link attempts to detach itself from cgroup. At that time we need to check whether there is still cgroup associated with that link. And if not, exit with success, because bpf_cgroup_link was already successfully detached. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Roman Gushchin Link: https://lore.kernel.org/bpf/20200330030001.2312810-2-andriin@fb.com --- include/linux/bpf-cgroup.h | 29 ++++++++++++++++++++++++----- include/linux/bpf.h | 10 +++++++++- 2 files changed, 33 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a7cd5c7a2509..d2d969669564 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -51,9 +51,18 @@ struct bpf_cgroup_storage { struct rcu_head rcu; }; +struct bpf_cgroup_link { + struct bpf_link link; + struct cgroup *cgroup; + enum bpf_attach_type type; +}; + +extern const struct bpf_link_ops bpf_cgroup_link_lops; + struct bpf_prog_list { struct list_head node; struct bpf_prog *prog; + struct bpf_cgroup_link *link; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; }; @@ -84,20 +93,23 @@ struct cgroup_bpf { int cgroup_bpf_inherit(struct cgroup *cgrp); void cgroup_bpf_offline(struct cgroup *cgrp); -int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - struct bpf_prog *replace_prog, +int __cgroup_bpf_attach(struct cgroup *cgrp, + struct bpf_prog *prog, struct bpf_prog *replace_prog, + struct bpf_cgroup_link *link, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_cgroup_link *link, enum bpf_attach_type type); int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ -int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - struct bpf_prog *replace_prog, enum bpf_attach_type type, +int cgroup_bpf_attach(struct cgroup *cgrp, + struct bpf_prog *prog, struct bpf_prog *replace_prog, + struct bpf_cgroup_link *link, enum bpf_attach_type type, u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags); + enum bpf_attach_type type); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -332,6 +344,7 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); +int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); #else @@ -354,6 +367,12 @@ static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr, return -EINVAL; } +static inline int cgroup_bpf_link_attach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -EINVAL; +} + static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3bde59a8453b..56254d880293 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1082,15 +1082,23 @@ extern int sysctl_unprivileged_bpf_disabled; int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); -struct bpf_link; +struct bpf_link { + atomic64_t refcnt; + const struct bpf_link_ops *ops; + struct bpf_prog *prog; + struct work_struct work; +}; struct bpf_link_ops { void (*release)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link); + }; void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, struct bpf_prog *prog); +void bpf_link_cleanup(struct bpf_link *link, struct file *link_file, + int link_fd); void bpf_link_inc(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); -- cgit v1.2.3 From 0c991ebc8c69d29b7fc44db17075c5aa5253e2ab Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 29 Mar 2020 19:59:59 -0700 Subject: bpf: Implement bpf_prog replacement for an active bpf_cgroup_link Add new operation (LINK_UPDATE), which allows to replace active bpf_prog from under given bpf_link. Currently this is only supported for bpf_cgroup_link, but will be extended to other kinds of bpf_links in follow-up patches. For bpf_cgroup_link, implemented functionality matches existing semantics for direct bpf_prog attachment (including BPF_F_REPLACE flag). User can either unconditionally set new bpf_prog regardless of which bpf_prog is currently active under given bpf_link, or, optionally, can specify expected active bpf_prog. If active bpf_prog doesn't match expected one, no changes are performed, old bpf_link stays intact and attached, operation returns a failure. cgroup_bpf_replace() operation is resolving race between auto-detachment and bpf_prog update in the same fashion as it's done for bpf_link detachment, except in this case update has no way of succeeding because of target cgroup marked as dying. So in this case error is returned. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200330030001.2312810-3-andriin@fb.com --- include/linux/bpf-cgroup.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d2d969669564..c11b413d5b1a 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -100,6 +100,8 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, struct bpf_cgroup_link *link, enum bpf_attach_type type); +int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link, + struct bpf_prog *new_prog); int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -110,6 +112,8 @@ int cgroup_bpf_attach(struct cgroup *cgrp, u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); +int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog, + struct bpf_prog *new_prog); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -350,6 +354,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, #else struct bpf_prog; +struct bpf_link; struct cgroup_bpf {}; static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} @@ -373,6 +378,13 @@ static inline int cgroup_bpf_link_attach(const union bpf_attr *attr, return -EINVAL; } +static inline int cgroup_bpf_replace(struct bpf_link *link, + struct bpf_prog *old_prog, + struct bpf_prog *new_prog) +{ + return -EINVAL; +} + static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { -- cgit v1.2.3